diff --git a/.dockerignore b/.dockerignore deleted file mode 120000 index 3e4e48b0b5fe6b468434d6767749b399319f2da2..0000000000000000000000000000000000000000 --- a/.dockerignore +++ /dev/null @@ -1 +0,0 @@ -.gitignore \ No newline at end of file diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..2b2e74053d33cb6d2878fd3d6da48fa344172f63 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,15 @@ +*.DS_Store +build/ +*.user +.vscode +.idea +.project +.cproject +.pydevproject +Makefile +.test_env/ +third_party/ +*~ +bazel-* + +!build/*.deb diff --git a/.gitignore b/.gitignore index 6aae076a49012b032b8fc0f1dc02c2714fb7b4a3..ee7c6ec370cd7c1f3435b41d915e24023c456af7 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ build/ .project .cproject .pydevproject +.settings/ Makefile .test_env/ third_party/ diff --git a/CMakeLists.txt b/CMakeLists.txt index e991a9a0ea0cf2a3d4f5f1e900bfc38e703aaf39..e78ccdf6d3b09c0170df5bd091fd1620b131216a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,7 +40,7 @@ option(WITH_RDMA "Compile PaddlePaddle with RDMA support" OFF) option(WITH_TIMER "Compile PaddlePaddle with stats timer" OFF) option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler" OFF) option(WITH_DOC "Compile PaddlePaddle with documentation" OFF) -option(ON_COVERALLS "Compile PaddlePaddle with code coverage" OFF) +option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF) option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF) option(ON_TRAVIS "Exclude special unit test on Travis CI" OFF) option(WITH_C_API "Compile PaddlePaddle with C-API(Prediction)" OFF) @@ -90,14 +90,21 @@ include_directories("${PROJ_ROOT}/paddle/cuda/include") include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto") set(EXTERNAL_LIBS - # have not include gtest here. ${GFLAGS_LIBRARIES} ${GLOG_LIBRARIES} ${CBLAS_LIBRARIES} ${PROTOBUF_LIBRARY} ${ZLIB_LIBRARIES} + ${PYTHON_LIBRARIES} ) +if(WITH_GPU) + list(APPEND EXTERNAL_LIB ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY}) + if(NOT WITH_DSO) + list(APPEND EXTERNAL_LIB ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY}) + endif(NOT WITH_DSO) +endif(WITH_GPU) + add_subdirectory(proto) add_subdirectory(paddle) add_subdirectory(python) diff --git a/Dockerfile b/Dockerfile index 536adb0716447aa8b8c10beef8b974ae3f016f05..ccd43be668e7acb1a82bb88f5938755a5d3974d1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,20 +3,17 @@ FROM nvidia/cuda:7.5-cudnn5-devel-ubuntu14.04 MAINTAINER PaddlePaddle Authors -ARG DEBIAN_FRONTEND=noninteractive ARG UBUNTU_MIRROR RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi' # ENV variables ARG BUILD_WOBOQ -ARG BUILD_AND_INSTALL ARG WITH_GPU ARG WITH_AVX ARG WITH_DOC ARG WITH_STYLE_CHECK ENV BUILD_WOBOQ=${BUILD_WOBOQ:-OFF} -ENV BUILD_AND_INSTALL=${BUILD_AND_INSTALL:-OFF} ENV WITH_GPU=${WITH_AVX:-OFF} ENV WITH_AVX=${WITH_AVX:-ON} ENV WITH_DOC=${WITH_DOC:-OFF} @@ -31,7 +28,7 @@ RUN apt-get update && \ apt-get install -y wget unzip tar xz-utils bzip2 gzip coreutils && \ apt-get install -y curl sed grep graphviz libjpeg-dev zlib1g-dev && \ apt-get install -y python-numpy python-matplotlib gcc g++ gfortran && \ - apt-get install -y automake locales clang-format-3.8 && \ + apt-get install -y automake locales clang-format-3.8 swig && \ apt-get clean -y # git credential to skip password typing @@ -51,8 +48,6 @@ RUN curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \ cd cmake-3.4.1 && ./bootstrap && make -j `nproc` && make install && \ cd .. && rm -rf cmake-3.4.1 -RUN apt-get install -y swig - VOLUME ["/usr/share/nginx/html/data", "/usr/share/nginx/html/paddle"] # Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service diff --git a/README.md b/README.md index 8a8e15841586ae6a01bb93e94f6074189f556f5a..bcc24b84128df282a2e3f0bc62aafe1ffe172338 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,8 @@ [![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle) -[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/) -[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/cn/index.html) +[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/develop/doc/) +[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/doc_cn/) [![Coverage Status](https://coveralls.io/repos/github/PaddlePaddle/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/PaddlePaddle/Paddle?branch=develop) [![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases) [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE) @@ -59,36 +59,36 @@ Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddl the capability of PaddlePaddle to make a huge impact for your product. ## Installation -Check out the [Install Guide](http://paddlepaddle.org/doc/build/) to install from -pre-built packages (**docker image**, **deb package**) or -directly build on **Linux** and **Mac OS X** from the source code. + +It is recommended to check out the +[Docker installation guide](http://www.paddlepaddle.org/develop/doc/getstarted/build_and_install/docker_install_en.html) +before looking into the +[build from source guide](http://www.paddlepaddle.org/develop/doc/getstarted/build_and_install/build_from_source_en.html) ## Documentation -Both [English Docs](http://paddlepaddle.org/doc/) and [Chinese Docs](http://paddlepaddle.org/doc_cn/) are provided for our users and developers. -- [Quick Start](http://paddlepaddle.org/doc/demo/quick_start/index_en)
- You can follow the quick start tutorial to learn how use PaddlePaddle - step-by-step. +We provide [English](http://www.paddlepaddle.org/develop/doc/) and +[Chinese](http://www.paddlepaddle.org/doc_cn/) documentation. + +- [Deep Learning 101](http://book.paddlepaddle.org/index.en.html) + + You might want to start from the this online interactive book that can run in Jupyter Notebook. + +- [Distributed Training](http://www.paddlepaddle.org/develop/doc/howto/usage/cluster/cluster_train_en.html) + + You can run distributed training jobs on MPI clusters. + +- [Distributed Training on Kubernetes](http://www.paddlepaddle.org/develop/doc/howto/usage/k8s/k8s_en.html) -- [Example and Demo](http://paddlepaddle.org/doc/demo/)
- We provide five demos, including: image classification, sentiment analysis, - sequence to sequence model, recommendation, semantic role labeling. + You can also run distributed training jobs on Kubernetes clusters. -- [Distributed Training](http://paddlepaddle.org/doc/cluster)
- This system supports training deep learning models on multiple machines - with data parallelism. +- [Python API](http://www.paddlepaddle.org/develop/doc/api/index_en.html) -- [Python API](http://paddlepaddle.org/doc/ui/)
- PaddlePaddle supports using either Python interface or C++ to build your - system. We also use SWIG to wrap C++ source code to create a user friendly - interface for Python. You can also use SWIG to create interface for your - favorite programming language. + Our new API enables much shorter programs. -- [How to Contribute](http://paddlepaddle.org/doc/build/contribute_to_paddle.html)
- We sincerely appreciate your interest and contributions. If you would like to - contribute, please read the contribution guide. +- [How to Contribute](http://www.paddlepaddle.org/develop/doc/howto/dev/contribute_to_paddle_en.html) -- [Source Code Documents](http://paddlepaddle.org/doc/source/)
+ We appreciate your contributions! ## Ask Questions diff --git a/cmake/coveralls.cmake b/cmake/coveralls.cmake index 9be7643819efdde3f42e4d39b2849ecc17e0d9fb..ca1471cabb57c0795ee193493d2e60bb5bd9e1cc 100644 --- a/cmake/coveralls.cmake +++ b/cmake/coveralls.cmake @@ -61,7 +61,7 @@ function(code_coverage _COVERAGE_SRCS _COVERALLS_UPLOAD _CMAKE_SCRIPT_PATH) endif() endfunction() -if(ON_COVERALLS) +if(WITH_COVERAGE) set(CMAKE_BUILD_TYPE "Debug") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage") diff --git a/cmake/coverallsGcovJsons.cmake b/cmake/coverallsGcovJsons.cmake index ad9a10cb8616159b9e3aff445e698cb2edb92820..4641184fcf5273b884524d9b9444209ffb65e000 100644 --- a/cmake/coverallsGcovJsons.cmake +++ b/cmake/coverallsGcovJsons.cmake @@ -134,7 +134,7 @@ foreach(GCDA ${GCDA_FILES}) # If -p is not specified then the file is named only "the_file.c.gcov" # execute_process( - COMMAND "${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA} >/dev/null" + COMMAND ${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA} >/dev/null WORKING_DIRECTORY ${GCDA_DIR} ) endforeach() diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake index 29d17691db9f4575bae4372c61a0e1964e163fc9..00dde9a9fdd4d4825947b987b3e8e0460f4a5f3a 100644 --- a/cmake/external/openblas.cmake +++ b/cmake/external/openblas.cmake @@ -45,7 +45,7 @@ IF(NOT ${CBLAS_FOUND}) PREFIX ${CBLAS_SOURCES_DIR} INSTALL_DIR ${CBLAS_INSTALL_DIR} BUILD_IN_SOURCE 1 - BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} FC=${CMAKE_Fortran_COMPILER} CC=${CMAKE_C_COMPILER} HOSTCC=${CMAKE_C_COMPILER} NO_SHARED=1 libs netlib + BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} FC=${CMAKE_Fortran_COMPILER} CC=${CMAKE_C_COMPILER} HOSTCC=${CMAKE_C_COMPILER} DYNAMIC_ARCH=1 NO_SHARED=1 libs netlib INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} install NO_SHARED=1 PREFIX= UPDATE_COMMAND "" CONFIGURE_COMMAND "" diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index 446a7532c55bd3ca66662efe70db93551580b8cc..ad1426fd940c7b163668c33d41731fe75d89dd89 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -14,7 +14,8 @@ INCLUDE(ExternalProject) -FIND_PACKAGE(Protobuf 3.1) +set(PROTOBUF_VERSION 3.1) +FIND_PACKAGE(Protobuf ${PROTOBUF_VERSION}) IF(PROTOBUF_FOUND) EXEC_PROGRAM(${PROTOBUF_PROTOC_EXECUTABLE} ARGS --version OUTPUT_VARIABLE PROTOBUF_VERSION) diff --git a/cmake/util.cmake b/cmake/util.cmake index 3640e4651fdd8b491f63875a7ea886afcadf978a..bacb64eb9ee65fffc824e4587a22fc432c092b19 100644 --- a/cmake/util.cmake +++ b/cmake/util.cmake @@ -90,26 +90,6 @@ function(link_paddle_exe TARGET_NAME) ${RDMA_LD_FLAGS} ${RDMA_LIBS}) - if(WITH_PYTHON) - target_link_libraries(${TARGET_NAME} - ${PYTHON_LIBRARIES} util) - endif() - - if(WITH_GPU) - target_link_libraries(${TARGET_NAME} ${CUDA_CUDART_LIBRARY}) - if(NOT WITH_DSO OR WITH_METRIC) - target_link_libraries(${TARGET_NAME} - ${CUDNN_LIBRARY} - ${CUDA_curand_LIBRARY}) - CUDA_ADD_CUBLAS_TO_TARGET(${TARGET_NAME}) - endif() - - check_library_exists(rt clock_gettime "time.h" HAVE_CLOCK_GETTIME ) - if(HAVE_CLOCK_GETTIME) - target_link_libraries(${TARGET_NAME} rt) - endif() - endif() - add_dependencies(${TARGET_NAME} ${external_project_dependencies}) endfunction() diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index db33a20487e579cda67a01c52ee646829df0f4e6..05817ec85455ac58566e90956a54cb86541f8488 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -109,6 +109,12 @@ sum_to_one_norm :members: sum_to_one_norm :noindex: +cross_channel_norm +------------------ +.. automodule:: paddle.v2.layer + :members: cross_channel_norm + :noindex: + Recurrent Layers ================ diff --git a/doc/getstarted/build_and_install/build_from_source_en.md b/doc/getstarted/build_and_install/build_from_source_en.md index d9d54bff3096cb3520409971dbd1b2e179ac8be1..69f4501f370dcc9d603ec54a63d68568d66e832e 100644 --- a/doc/getstarted/build_and_install/build_from_source_en.md +++ b/doc/getstarted/build_and_install/build_from_source_en.md @@ -51,7 +51,7 @@ PaddlePaddle supports some build options. WITH_TIMERCompile PaddlePaddle with stats timer WITH_PROFILERCompile PaddlePaddle with GPU profiler WITH_DOCCompile PaddlePaddle with documentation -ON_COVERALLSCompile PaddlePaddle with code coverage +WITH_COVERAGECompile PaddlePaddle with code coverage COVERALLS_UPLOADPackage code coverage data to coveralls ON_TRAVISExclude special unit test on Travis CI diff --git a/paddle/api/CMakeLists.txt b/paddle/api/CMakeLists.txt index 6e8fcd114df580a00858d95f0af0d1ec0bd9b4a2..3760c6727c21cfb32ca4d2efc30351352c9b182b 100644 --- a/paddle/api/CMakeLists.txt +++ b/paddle/api/CMakeLists.txt @@ -1,21 +1,3 @@ -FUNCTION(generate_python_api target_name) - ADD_CUSTOM_COMMAND(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py - ${PROJ_ROOT}/paddle/Paddle_wrap.cxx - ${PROJ_ROOT}/paddle/Paddle_wrap.h - COMMAND ${SWIG_EXECUTABLE} -python -c++ -outcurrentdir -I../ api/Paddle.swig - && mv ${PROJ_ROOT}/paddle/swig_paddle.py ${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py - DEPENDS ${PROJ_ROOT}/paddle/api/Paddle.swig - ${PROJ_ROOT}/paddle/api/PaddleAPI.h - ${external_project_dependencies} - WORKING_DIRECTORY ${PROJ_ROOT}/paddle - COMMENT "Generate Python API from swig") - ADD_CUSTOM_TARGET(${target_name} ALL DEPENDS - ${PROJ_ROOT}/paddle/Paddle_wrap.cxx - ${PROJ_ROOT}/paddle/Paddle_wrap.h - ${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py - ${external_project_dependencies}) -ENDFUNCTION(generate_python_api) - set(API_SOURCES Arguments.cpp ConfigParser.cpp @@ -33,65 +15,86 @@ set(API_HEADER PaddleAPI.h Internal.h) -add_library(paddle_api STATIC - ${API_SOURCES}) +add_library(paddle_api STATIC ${API_SOURCES}) add_dependencies(paddle_api gen_proto_cpp) -list(LENGTH "${GFLAGS_LIBRARIES}" GFLAGS_LIBRARIES_LENGTH) +INCLUDE(${SWIG_USE_FILE}) +INCLUDE_DIRECTORIES(${PROJ_ROOT}/paddle) -if(${GFLAGS_LIBRARIES_LENGTH} EQUAL 0 AND TARGET "${GFLAGS_LIBRARIES}") -# Because gflags compiled by cmake, so it is imported by cmake target, -# not a real library path. Get the real library path here. -message(STATUS "GFLAGS Libraries is ${GFLAGS_LIBRARIES}") -get_target_property(GFLAGS_LOCATION ${GFLAGS_LIBRARIES} LOCATION) -message(STATUS "GFLAGS Target location is ${GFLAGS_LOCATION}") -else() -set(GFLAGS_LOCATION ${GFLAGS_LIBRARIES}) -endif() +FILE(GLOB PY_PADDLE_PYTHON_FILES ${PROJ_ROOT}/paddle/py_paddle/*.py) + +SET_SOURCE_FILES_PROPERTIES(Paddle.i PROPERTIES CPLUSPLUS ON) + +SET(CMAKE_SWIG_OUTDIR ${CMAKE_CURRENT_BINARY_DIR}) +SET(CMAKE_CXX_FLAGS "-std=c++11 -fPIC -Wall") +IF(WITH_COVERAGE) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage") +ENDIF(WITH_COVERAGE) -configure_file( - paddle_api_config.py.in - ${PROJ_ROOT}/paddle/api/paddle_api_config.py +SET(SWIG_MODULE_swig_paddle_EXTRA_DEPS + paddle_parameter + paddle_function + paddle_math + paddle_utils + paddle_gserver + paddle_pserver + paddle_api + paddle_cuda + paddle_trainer_lib + paddle_network + paddle_proto + ${external_project_dependencies} ) -generate_python_api(python_swig_sources) +IF(APPLE) + SET(MACOS_LD_FLAGS "-undefined dynamic_lookup -Wl,-all_load") +ELSE(APPLE) + SET(START_GROUP "-Xlinker -start-group") + SET(END_GROUP "-Xlinker -end-group") + SET(ARCHIVE_START "-Wl,--whole-archive") + SET(ARCHIVE_END "-Wl,--no-whole-archive") +ENDIF(APPLE) -file(GLOB PY_PADDLE_PYTHON_FILES ${PROJ_ROOT}/paddle/py_paddle/*.py) +SWIG_ADD_MODULE(swig_paddle python Paddle.i) +SWIG_LINK_LIBRARIES(swig_paddle + ${MACOS_LD_FLAGS} + ${START_GROUP} + ${ARCHIVE_START} + paddle_gserver + paddle_function + ${METRIC_LIBS} + ${ARCHIVE_END} + paddle_pserver + paddle_trainer_lib + paddle_network + paddle_parameter + paddle_math + paddle_utils + paddle_proto + paddle_cuda + paddle_api + ${CMAKE_DL_LIBS} + ${EXTERNAL_LIBS} + ${CMAKE_THREAD_LIBS_INIT} + ${RDMA_LD_FLAGS} + ${RDMA_LIBS} + ${START_END} +) -# TODO(yuyang18) : make wheel name calculated by cmake -add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/dist/.timestamp +add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so + COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/swig_paddle.py ${PROJ_ROOT}/paddle/py_paddle + COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/_swig_paddle.so ${PROJ_ROOT}/paddle/py_paddle COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND ${CMAKE_COMMAND} -E touch dist/.timestamp COMMAND rm -rf py_paddle.egg-info build WORKING_DIRECTORY ${PROJ_ROOT}/paddle - DEPENDS python_swig_sources - paddle_parameter - paddle_function - paddle_math - paddle_utils - paddle_gserver - paddle_pserver - paddle_trainer - paddle_api - paddle_cuda - ${PY_PADDLE_PYTHON_FILES} + DEPENDS _swig_paddle ) -install(DIRECTORY ${PROJ_ROOT}/paddle/dist/ - DESTINATION opt/paddle/share/wheels -) +# TODO(yuyang18) : make wheel name calculated by cmake +add_custom_target(python_api_wheel ALL DEPENDS ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so) -add_custom_target(python_api_wheel ALL DEPENDS - ${PROJ_ROOT}/paddle/dist/.timestamp) -add_dependencies(python_api_wheel python_swig_sources - paddle_parameter - paddle_math - paddle_utils - paddle_gserver - paddle_pserver - paddle_trainer - paddle_api - paddle_cuda) +install(DIRECTORY ${PROJ_ROOT}/paddle/dist/ DESTINATION opt/paddle/share/wheels) if(WITH_TESTING) IF(NOT PY_PIP_FOUND) diff --git a/paddle/api/Paddle.swig b/paddle/api/Paddle.i similarity index 100% rename from paddle/api/Paddle.swig rename to paddle/api/Paddle.i diff --git a/paddle/api/paddle_api_config.py.in b/paddle/api/paddle_api_config.py.in deleted file mode 100644 index 82f45ba6ccec49eb190d1814a67a575f311689e8..0000000000000000000000000000000000000000 --- a/paddle/api/paddle_api_config.py.in +++ /dev/null @@ -1,17 +0,0 @@ -PADDLE_BUILD_DIR="@CMAKE_CURRENT_BINARY_DIR@/../" -WITH_GPU="@WITH_GPU@" -PROTOBUF_LIBRARY="@PROTOBUF_LIBRARY@" -ZLIB_LIBRARIES="@ZLIB_LIBRARIES@" -CMAKE_THREAD_LIB="@CMAKE_THREAD_LIBS_INIT@" -CMAKE_DL_LIBS="@CMAKE_DL_LIBS@" - - -WITH_PYTHON="@WITH_PYTHON@" -PYTHON_LIBRARIES="@PYTHON_LIBRARIES@" -GLOG_LIBRARIES="@GLOG_LIBRARIES@" -GFLAGS_LIBRARIES="@GFLAGS_LIBRARIES@" -GFLAGS_LOCATION="@GFLAGS_LOCATION@" -CBLAS_LIBRARIES="@CBLAS_LIBRARIES@" - -CUDA_LIBRARIES="@CUDA_CUDART_LIBRARY@" -WITH_COVERALLS="@ON_COVERALLS@" diff --git a/paddle/api/paddle_ld_flags.py b/paddle/api/paddle_ld_flags.py deleted file mode 100644 index ad5dce209bf8e14120320a58c3cd85d6f6a97688..0000000000000000000000000000000000000000 --- a/paddle/api/paddle_ld_flags.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -try: - from paddle_api_config import * - import os.path - import platform - - system = platform.system().lower() - is_osx = (system == 'darwin') - is_win = (system == 'windows') - is_lin = (system == 'linux') - - if is_lin: - whole_start = "-Wl,--whole-archive" - whole_end = "-Wl,--no-whole-archive" - elif is_osx: - whole_start = "" - whole_end = "" - - LIB_DIRS = [ - "math", 'function', 'utils', 'parameter', "gserver", "api", "cuda", - "pserver", "trainer" - ] - PARENT_LIB_DIRS = ['proto'] - - class PaddleLDFlag(object): - def __init__(self): - self.paddle_build_dir = PADDLE_BUILD_DIR - self.paddle_build_dir = os.path.abspath(self.paddle_build_dir) - self.with_gpu = PaddleLDFlag.cmake_bool(WITH_GPU) - self.protolib = PROTOBUF_LIBRARY - self.zlib = ZLIB_LIBRARIES - self.thread = CMAKE_THREAD_LIB - self.dl_libs = CMAKE_DL_LIBS - self.with_python = PaddleLDFlag.cmake_bool(WITH_PYTHON) - self.python_libs = PYTHON_LIBRARIES - - self.glog_libs = GLOG_LIBRARIES - - self.with_coverage = PaddleLDFlag.cmake_bool(WITH_COVERALLS) - self.gflags_libs = GFLAGS_LIBRARIES - self.gflags_location = GFLAGS_LOCATION - self.cblas_libs = CBLAS_LIBRARIES - self.curt = CUDA_LIBRARIES - - def ldflag_str(self): - return " ".join( - [self.libs_dir_str(), self.parent_dir_str(), self.libs_str()]) - - def libs_dir_str(self): - libdirs = LIB_DIRS - return " ".join( - map(lambda x: "-L" + os.path.join(self.paddle_build_dir, x), - libdirs)) - - def parent_dir_str(self): - libdirs = PARENT_LIB_DIRS - return " ".join( - map(lambda x: "-L" + os.path.join(self.paddle_build_dir, '..', x), - libdirs)) - - def libs_str(self): - libs = [ - whole_start, - "-lpaddle_gserver", - "-lpaddle_function", - whole_end, - "-lpaddle_pserver", - "-lpaddle_trainer_lib", - "-lpaddle_network", - '-lpaddle_parameter', - "-lpaddle_math", - '-lpaddle_utils', - "-lpaddle_proto", - "-lpaddle_cuda", - "-lpaddle_api", - self.normalize_flag(self.protolib), - self.normalize_flag(self.glog_libs), - self.normalize_flag(self.gflags_libs), - self.normalize_flag(self.zlib), - self.normalize_flag(self.thread), - self.normalize_flag(self.dl_libs), - self.normalize_flag(self.cblas_libs), - ] - - if self.with_python: - libs.append(self.normalize_flag(self.python_libs)) - if self.with_gpu: - libs.append(self.normalize_flag(self.curt)) - if self.with_coverage: - libs.append("-fprofile-arcs") - return " ".join(filter(lambda l: len(l) != 0, libs)) - - def normalize_flag(self, cmake_flag): - """ - CMake flag string to ld flag - :type cmake_flag: str - """ - if ";" in cmake_flag: - return " ".join(map(self.normalize_flag, cmake_flag.split(";"))) - if cmake_flag.startswith("/"): # is a path - return cmake_flag - elif cmake_flag.startswith("-l"): # normal link command - return cmake_flag - elif cmake_flag in [ - "gflags-shared", "gflags-static", "gflags_nothreads-shared", - "gflags_nothreads-static" - ]: # special for gflags - assert PaddleLDFlag.cmake_bool(self.gflags_location) - return self.gflags_location - elif len(cmake_flag) != 0: - return "".join(["-l", cmake_flag]) - else: - return "" - - @staticmethod - def cmake_bool(cmake_str): - """ - CMake bool string to bool - :param cmake_str: cmake boolean string - :type cmake_str: str - :rtype: bool - """ - if cmake_str in ["FALSE", "OFF", "NO"] or cmake_str.endswith( - "-NOTFOUND"): - return False - else: - return True - - def c_flag(self): - if self.with_coverage: - return [ - "-fprofile-arcs", "-ftest-coverage", "-O0", "-g", - "-std=c++11" - ] - else: - return ["-std=c++11"] -except ImportError: - - class PaddleLDFlag(object): - def ldflag_str(self): - pass - - def c_flag(self): - pass diff --git a/paddle/gserver/CMakeLists.txt b/paddle/gserver/CMakeLists.txt index 4f92150ec84d637c5b75cba09d7e98501a5a5f5d..93a6a99848aa13bb36c9c5c7091fbaa891fc9823 100644 --- a/paddle/gserver/CMakeLists.txt +++ b/paddle/gserver/CMakeLists.txt @@ -25,12 +25,16 @@ filter_test(GSERVER_HEADER) filter_test(GSERVER_SOURCES) if(NOT WITH_GPU) list(REMOVE_ITEM GSERVER_HEADER + layers/CudnnConvBaseLayer.h layers/CudnnConvLayer.h + layers/CudnnConvTransLayer.h layers/CudnnPoolLayer.h layers/CudnnBatchNormLayer.h) list(REMOVE_ITEM GSERVER_SOURCES + layers/CudnnConvBaseLayer.cpp layers/CudnnConvLayer.cpp + layers/CudnnConvTransLayer.cpp layers/CudnnPoolLayer.cpp layers/CudnnBatchNormLayer.cpp) compile_cu_as_cpp(layers/LstmCompute.cu) diff --git a/paddle/gserver/dataproviders/DataProvider.h b/paddle/gserver/dataproviders/DataProvider.h index 9a2ad7567f0dc93d0a8e396fd88b2488afe9d049..40036762179ebb1495b90907f16b97e3c60c50d8 100644 --- a/paddle/gserver/dataproviders/DataProvider.h +++ b/paddle/gserver/dataproviders/DataProvider.h @@ -164,15 +164,6 @@ public: argu.value = value; data_.push_back(argu); } - /** - * @brief Append user defined data - * @param[in] ptr user defined data - */ - void appendUserDefinedPtr(UserDefinedVectorPtr ptr) { - Argument argu; - argu.udp = ptr; - data_.push_back(argu); - } /* * @brief Append argument diff --git a/paddle/gserver/layers/ConvBaseOperator.cpp b/paddle/gserver/layers/ConvBaseOperator.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5c231986292d2cd26ee30ccc122142fccd5b4949 --- /dev/null +++ b/paddle/gserver/layers/ConvBaseOperator.cpp @@ -0,0 +1,150 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "ConvBaseOperator.h" +#include "paddle/math/MathUtils.h" +#include "paddle/math/Matrix.h" + +namespace paddle { + +/** + * @brief ConvBaseOperator takes two inputs to perform the convolution. + * The first input is the image, and the second input is the convolution kernel. + * The height of data for two inputs are the same. Each data of the first input + * is convolved with each data of the second input indepedently. + * + * The config file api is conv_operator. + */ + +ConvBaseOperator::ConvBaseOperator(const OperatorConfig &config, bool useGpu) + : Operator(config, useGpu) { + CHECK(useGpu); + CHECK_EQ(config_.input_indices_size(), 2L); + + caffeMode_ = true; + getConvParams(); + computeConvSizes(); + + // initialize all to default algorithms + fwdAlgo_ = 0; + bwdFilterAlgo_ = 0; + bwdDataAlgo_ = 0; + fwdLimitBytes_ = 0; + bwdDataLimitBytes_ = 0; + bwdFilterLimitBytes_ = 0; + workSpaceInBytes_ = 0; + workSpace_ = nullptr; + + isSelectAlgo_ = false; +} + +void ConvBaseOperator::allocConvWorkSpace() { + hl_conv_workspace(imageDesc_, + outputDesc_, + filterDesc_, + convDesc_, + &fwdAlgo_, + &fwdLimitBytes_, + &bwdDataAlgo_, + &bwdDataLimitBytes_, + &bwdFilterAlgo_, + &bwdFilterLimitBytes_); + + size_t maxWorkSpace = 0; + maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_); + maxWorkSpace = std::max(maxWorkSpace, bwdFilterLimitBytes_); + + if (maxWorkSpace > workSpaceInBytes_) { + if (workSpaceInBytes_ != 0) { + hl_free_mem_device(workSpace_); + } + // total amount of storage needed + workSpace_ = hl_malloc_device(maxWorkSpace); + workSpaceInBytes_ = maxWorkSpace; + } +} + +void ConvBaseOperator::computeConvSizes() { + hl_create_filter_descriptor( + &filterDesc_, channels_, numFilters_, filterSizeY_, filterSize_); + hl_create_tensor_descriptor(&imageDesc_); + hl_create_tensor_descriptor(&outputDesc_); + hl_create_convolution_descriptor(&convDesc_, + imageDesc_, + filterDesc_, + paddingY_, + padding_, + strideY_, + stride_); +} + +void ConvBaseOperator::reshapeImageDescriptors() { + hl_tensor_reshape(imageDesc_, + 1, + channels_, + imageH_, + imageW_, + channels_ * imageH_ * imageW_, + imageH_ * imageW_, + imageW_, + 1); + hl_tensor_reshape(outputDesc_, + 1, + numFilters_, + outputH_, + outputW_, + numFilters_ * outputH_ * outputW_, + outputH_ * outputW_, + outputW_, + 1); + hl_reset_convolution_descriptor(convDesc_, + imageDesc_, + filterDesc_, + paddingY_, + padding_, + strideY_, + stride_); +} + +void ConvBaseOperator::getConvParams() { + configNumFilters_ = config_.num_filters(); + const ConvConfig &conf = config_.conv_conf(); + padding_ = conf.padding(); + stride_ = conf.stride(); + filterSize_ = conf.filter_size(); + paddingY_ = conf.padding_y(); + strideY_ = conf.stride_y(); + filterSizeY_ = conf.filter_size_y(); + filterPixels_ = filterSize_ * filterSizeY_; + configChannels_ = conf.channels(); + imgSize_ = conf.img_size(); + imgSizeY_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size(); + imgPixels_ = imgSize_ * imgSizeY_; + CHECK_EQ(conf.groups(), 1U); + filterChannels_ = conf.filter_channels(); + outputX_ = conf.output_x(); + outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x(); + outputs_ = outputX_ * outputX_; + + isDeconv_ = (config_.type() == "conv") ? false : true; + if (isDeconv_) { + channels_ = configNumFilters_; + numFilters_ = configChannels_; + } else { + channels_ = configChannels_; + numFilters_ = configNumFilters_; + } +} + +} // namespace paddle diff --git a/paddle/gserver/layers/ConvBaseOperator.h b/paddle/gserver/layers/ConvBaseOperator.h new file mode 100644 index 0000000000000000000000000000000000000000..2d42169cde2a80a26edcf98bc2d728e00b075728 --- /dev/null +++ b/paddle/gserver/layers/ConvBaseOperator.h @@ -0,0 +1,112 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once + +#include "Operator.h" +#include "paddle/math/MathUtils.h" +#include "paddle/math/Matrix.h" + +namespace paddle { + +/** + * @brief ConvOperator takes two inputs to perform the convolution. + * The first input is the image, and the second input is the convolution kernel. + * The height of data for two inputs are the same. Each data of the first input + * is convolved with each data of the second input indepedently. + * + * The config file api is conv_operator. + */ + +class ConvBaseOperator : public Operator { +public: + ConvBaseOperator(const OperatorConfig &config, bool useGpu); + /** + * Free workspace in device and destroy cudnn tensor descriptor. + */ + virtual ~ConvBaseOperator() { + if (workSpaceInBytes_ != 0) { + hl_free_mem_device(workSpace_); + workSpaceInBytes_ = 0; + } + + hl_destroy_tensor_descriptor(imageDesc_); + hl_destroy_tensor_descriptor(outputDesc_); + hl_destroy_filter_descriptor(filterDesc_); + hl_destroy_convolution_descriptor(convDesc_); + } + +protected: + /** + * Get convolution parameters from layer config and + * initialize member variables. + */ + void getConvParams(); + + /** + * Allocate Gpu Memory for cudnn convolution algorithms. + */ + void allocConvWorkSpace(); + + /** + * Create cudnn tensor descriptor for convolution operation. + */ + void computeConvSizes(); + + /** + * Reshape cudnn tensor descriptor. + */ + void reshapeImageDescriptors(); + + /** + * Reshape cudnn tensor descriptor. + */ + virtual void reshape(int batchSize) = 0; + + /** + * Check filter size is equal to the size calculated by parameters from + * layer config. + */ + void checkFilterSize(const MatrixPtr &filter) { + CHECK_EQ(static_cast(filter->getWidth()), + filterSize_ * filterSizeY_ * channels_ * numFilters_); + } + + /// Most of member variables are same with CudnnConvLayer. + /// There is no explanation here. + bool isDeconv_; + int imageH_, imageW_, outputH_, outputW_; + hl_tensor_descriptor imageDesc_; + hl_tensor_descriptor outputDesc_; + hl_filter_descriptor filterDesc_; + hl_convolution_descriptor convDesc_; + bool caffeMode_; + int inputOffset_, outputOffset_, weightOffset_; + int numFilters_, channels_; + + /// from parsing config + int configNumFilters_, configChannels_; + int padding_, stride_, filterSize_, imgSize_, imgSizeY_; + int paddingY_, strideY_, filterSizeY_; + int imgPixels_, filterPixels_, filterChannels_, outputX_, outputY_, outputs_; + + /// Following member variables are same with CudnnConvLayer. + /// There is no explanation here. + int fwdAlgo_, bwdFilterAlgo_, bwdDataAlgo_; + size_t fwdLimitBytes_, bwdDataLimitBytes_, bwdFilterLimitBytes_; + size_t workSpaceInBytes_; + void *workSpace_; + bool isSelectAlgo_; +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/ConvBaseProjection.cpp b/paddle/gserver/layers/ConvBaseProjection.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d1e932ded595c90cbe6040c330c5c8663d81e2b4 --- /dev/null +++ b/paddle/gserver/layers/ConvBaseProjection.cpp @@ -0,0 +1,195 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "ConvBaseProjection.h" +#include "paddle/utils/Stat.h" + +namespace paddle { + +ThreadLocalD> ConvBaseProjection::convMem_; + +ConvBaseProjection::ConvBaseProjection(const ProjectionConfig &config, + ParameterPtr parameter, + bool useGpu) + : Projection(config, parameter, useGpu) { + CHECK(useGpu); // only support GPU + getConvParams(); + initCudnn(); + + size_t height = filterH_ * filterW_ * channels_ / groups_; + size_t width = numFilters_; + weight_.reset(new Weight(height, width, parameter)); + weightOffset_ = height * width / groups_; +} + +void ConvBaseProjection::getConvParams() { + const ConvConfig &conf = config_.conv_conf(); + paddingH_ = conf.padding_y(); + paddingW_ = conf.padding(); + + strideH_ = conf.stride_y(); + strideW_ = conf.stride(); + + filterH_ = conf.filter_size_y(); + filterW_ = conf.filter_size(); + + configImgH_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size(); + configImgW_ = conf.img_size(); + + configOutH_ = conf.has_output_y() ? conf.output_y() : conf.output_x(); + configOutW_ = conf.output_x(); + + configChannels_ = conf.channels(); + configNumFilters_ = config_.num_filters(); + + isDeconv_ = (config_.type() == "conv") ? false : true; + + channels_ = (isDeconv_) ? configNumFilters_ : configChannels_; + numFilters_ = (isDeconv_) ? configChannels_ : configNumFilters_; + + groups_ = conf.groups(); + CHECK_EQ(channels_ % groups_, 0); + CHECK_EQ(numFilters_ % groups_, 0); +} + +void ConvBaseProjection::initCudnn() { + hl_create_filter_descriptor(&filterDesc_, + channels_ / groups_, + numFilters_ / groups_, + filterH_, + filterW_); + hl_create_tensor_descriptor(&imageDesc_); + hl_create_tensor_descriptor(&outputDesc_); + hl_create_convolution_descriptor(&convDesc_, + imageDesc_, + filterDesc_, + paddingH_, + paddingW_, + strideH_, + strideW_); + + // initialize all to default algorithms + fwdAlgo_ = 0; + bwdFilterAlgo_ = 0; + bwdDataAlgo_ = 0; + fwdLimitBytes_ = 0; + bwdDataLimitBytes_ = 0; + bwdFilterLimitBytes_ = 0; + workSpaceInBytes_ = 0; + + batchNum_ = 0; + isSelectAlgo_ = false; +} + +void ConvBaseProjection::reshapeTensorDesc(int batchSize) { + // The stride between two consecutive samples in the output of ConvProjection + // may not be numFilters_ * outputH_ * outputW_ (conv) or + // channels_ * imageH_ * imageW_ (deconv) + // for example, in the case of layer ConcatenateLayer2 with two + // ConvProjection, the stride is the output_size of layer ConcatenateLayer2. + // So the calculation of nStride is different from CudnnConvLayer. + size_t nStrideImage, nStrideOutput; + if (isDeconv_) { + nStrideImage = out_->value->getStride(); + nStrideOutput = numFilters_ * outputH_ * outputW_; + } else { + nStrideImage = channels_ * imageH_ * imageW_; + nStrideOutput = out_->value->getStride(); + } + + hl_tensor_reshape(imageDesc_, + batchSize, + channels_ / groups_, + imageH_, + imageW_, + nStrideImage, + imageH_ * imageW_, + imageW_, + 1); + + hl_tensor_reshape(outputDesc_, + batchSize, + numFilters_ / groups_, + outputH_, + outputW_, + nStrideOutput, + outputH_ * outputW_, + outputW_, + 1); + + hl_reset_convolution_descriptor(convDesc_, + imageDesc_, + filterDesc_, + paddingH_, + paddingW_, + strideH_, + strideW_); +} + +void ConvBaseProjection::reshape(int batchSize) { + size_t width = calOutputSize(); + CHECK_EQ(width, out_->value->getWidth()); + CHECK_EQ(calInputSize(), in_->value->getWidth()); + + isSelectAlgo_ = (batchSize == batchNum_); + batchNum_ = batchSize; + + if (!isSelectAlgo_) { + reshapeTensorDesc(batchSize); + hl_conv_workspace(imageDesc_, + outputDesc_, + filterDesc_, + convDesc_, + &fwdAlgo_, + &fwdLimitBytes_, + &bwdDataAlgo_, + &bwdDataLimitBytes_, + &bwdFilterAlgo_, + &bwdFilterLimitBytes_); + + size_t maxWorkSpace = 0; + maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_); + maxWorkSpace = std::max(maxWorkSpace, bwdFilterLimitBytes_); + workSpaceInBytes_ = maxWorkSpace; + + VLOG(3) << getName() << " Fwd / BwdData / BwdFilter algo: " << fwdAlgo_ + << " / " << bwdDataAlgo_ << " / " << bwdFilterAlgo_; + } + + isSelectAlgo_ = true; +} + +void *ConvBaseProjection::getSpaceBytes(size_t size) { + std::vector &convMem = *convMem_; + if (convMem.empty()) { + int numDevices = hl_get_device_count(); + convMem.resize(numDevices); + } + + int devId = hl_get_device(); + MemoryHandle **localMem = &(convMem[devId]); + if (NULL == *localMem || size > (*localMem)->getAllocSize()) { + *localMem = new GpuMemoryHandle(size); + } + return (*localMem)->getBuf(); +} + +ConvBaseProjection::~ConvBaseProjection() { + hl_destroy_tensor_descriptor(imageDesc_); + hl_destroy_tensor_descriptor(outputDesc_); + hl_destroy_filter_descriptor(filterDesc_); + hl_destroy_convolution_descriptor(convDesc_); +} + +} // namespace paddle diff --git a/paddle/gserver/layers/ConvBaseProjection.h b/paddle/gserver/layers/ConvBaseProjection.h new file mode 100644 index 0000000000000000000000000000000000000000..4a33aa1837dfc36dbead60deaccbc6b772fe4754 --- /dev/null +++ b/paddle/gserver/layers/ConvBaseProjection.h @@ -0,0 +1,116 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Projection.h" +#include "paddle/math/MathUtils.h" + +namespace paddle { + +/** + * @brief Base class for ConvProjection and ConvTransProjection. + */ +class ConvBaseProjection : public Projection { +public: + /** + * Constructor. + */ + ConvBaseProjection(const ProjectionConfig& config, + ParameterPtr parameter, + bool useGpu); + + ~ConvBaseProjection(); + +protected: + void getConvParams(); + void initCudnn(); + + void reshapeTensorDesc(int batchSize); + void reshape(int batchSize); + + virtual size_t calOutputSize() = 0; + virtual size_t calInputSize() = 0; + + static void* getSpaceBytes(size_t size); + + /// True if it's deconv projection layer, false if it's ConvProjection layer + bool isDeconv_; + /// imageH_ and imageW_ / outputH_ and outputW_ + /// is calculated from the input layer. + int imageH_, imageW_; + int outputH_, outputW_; + /// configImgH_ and configImgW_ / configOutH_ and configOutW_ + /// is obtained from config. + int configImgH_, configImgW_; + int configOutH_, configOutW_; + /// channels_ and numFilters_ are defined in terms of convolution semantics + int channels_, numFilters_; + /// configChannels and configNumFilters_ are obtained from config + /// For Conv they are the same as channels_ and numFilters + /// For ConvTrans they are opposite to channels_ and numFilters + int configChannels_, configNumFilters_; + int paddingH_, paddingW_; + int strideH_, strideW_; + int filterH_, filterW_; + /// One group offset of input data. + int inputOffset_; + /// One group offset of output data. + int outputOffset_; + /// One group offset of weight. + int weightOffset_; + int groups_; + + /// Cudnn tensor descriptor for input. + hl_tensor_descriptor imageDesc_; + /// Cudnn tensor descriptor for output. + hl_tensor_descriptor outputDesc_; + /// Cudnn tensor descriptor for filter. + hl_filter_descriptor filterDesc_; + /// Cudnn tensor descriptor for a convolution operation. + hl_convolution_descriptor convDesc_; + + /// Record the algorithm for forward convolution, which is obtained by cudnn + /// api to search the best suited algorithm. + int fwdAlgo_; + /// Record the algorithm for computing convolution gradient with respect to + /// filter coefficients. + int bwdFilterAlgo_; + /// Record the algorithm for computing convolution gradient with respect to + /// the output. + int bwdDataAlgo_; + /// Amount of GPU memory needed as workspace to be able to execute a + /// forward convolution with the specified algo. + size_t fwdLimitBytes_; + /// Amount of GPU memory needed as workspace to be able to execute a + /// backwardFilter with the specified algo. + size_t bwdDataLimitBytes_; + /// Amount of GPU memory needed as workspace to be able to execute a + /// backwardData with the specified algo. + size_t bwdFilterLimitBytes_; + /// Size of total work space. + size_t workSpaceInBytes_; + + /// Whether to call cuDNN api to choose conv algorithm. + bool isSelectAlgo_; + /// batchNum is used to record batch size. If the batch size is changed, + /// the selection algorithm will be called. + int batchNum_; + bool bias_; + + std::unique_ptr weight_; + static ThreadLocalD> convMem_; +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/ConvOperator.cpp b/paddle/gserver/layers/ConvOperator.cpp index f943410dee0dc2f3d356c9d7d8f61398fe2871c8..80932c8c509e3cb013c7e0051cbf4d8ccced0228 100644 --- a/paddle/gserver/layers/ConvOperator.cpp +++ b/paddle/gserver/layers/ConvOperator.cpp @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "Operator.h" +#include "ConvOperator.h" #include "paddle/math/MathUtils.h" #include "paddle/math/Matrix.h" @@ -27,120 +27,8 @@ namespace paddle { * The config file api is conv_operator. */ -class ConvOperator : public Operator { -public: - ConvOperator(const OperatorConfig &config, bool useGpu); - /** - * Free workspace in device and destroy cudnn tensor descriptor. - */ - virtual ~ConvOperator() { - if (workSpaceInBytes_ != 0) { - hl_free_mem_device(workSpace_); - workSpaceInBytes_ = 0; - } - - hl_destroy_tensor_descriptor(inputDesc_); - hl_destroy_tensor_descriptor(outputDesc_); - hl_destroy_filter_descriptor(filterDesc_); - hl_destroy_convolution_descriptor(convDesc_); - } - virtual void forward(); - virtual void backward(); - -private: - /** - * Get convolution parameters from layer config and - * initialize member variables. - */ - void getConvParams(); - - /** - * Allocate Gpu Memory for cudnn convolution algorithms. - */ - void allocConvWorkSpace(size_t maxWorkSpace); - - /** - * Create cudnn tensor descriptor for convolution operation. - */ - void computeConvSizes(); - - /** - * Reshape cudnn tensor descriptor. - */ - void reshapeImageDescriptors(); - - /** - * Reshape cudnn tensor descriptor. - */ - void reshape(int batchSize); - - /** - * Check filter size is equal to the size calculated by parameters from - * layer config. - */ - void checkFilterSize(const MatrixPtr &filter) { - CHECK_EQ(static_cast(filter->getWidth()), - filterSize_ * filterSizeY_ * channels_ * numFilters_); - } - - /// Most of member variables are same with CudnnConvLayer. - /// There is no explanation here. - int imageH_, imageW_, outputH_, outputW_; - hl_tensor_descriptor inputDesc_; - hl_tensor_descriptor outputDesc_; - hl_filter_descriptor filterDesc_; - hl_convolution_descriptor convDesc_; - bool caffeMode_; - int inputOffset_, outputOffset_, weightOffset_; - int numFilters_; - int padding_, stride_, filterSize_, channels_, imgSize_, imgSizeY_; - int paddingY_, strideY_, filterSizeY_; - int imgPixels_, filterPixels_, filterChannels_, outputX_, outputY_, outputs_; - - /// Following member variables are same with CudnnConvLayer. - /// There is no explanation here. - int fwdAlgo_, bwdFilterAlgo_, bwdDataAlgo_; - size_t fwdLimitBytes_, bwdDataLimitBytes_, bwdFilterLimitBytes_; - size_t workSpaceInBytes_; - void *workSpace_; - bool isSelectAlgo_; -}; - REGISTER_OPERATOR(conv, ConvOperator); -ConvOperator::ConvOperator(const OperatorConfig &config, bool useGpu) - : Operator(config, useGpu) { - CHECK(useGpu); - CHECK_EQ(config_.input_indices_size(), 2L); - - caffeMode_ = true; - getConvParams(); - computeConvSizes(); - - // initialize all to default algorithms - fwdAlgo_ = 0; - bwdFilterAlgo_ = 0; - bwdDataAlgo_ = 0; - fwdLimitBytes_ = 0; - bwdDataLimitBytes_ = 0; - bwdFilterLimitBytes_ = 0; - workSpaceInBytes_ = 0; - workSpace_ = nullptr; - - isSelectAlgo_ = false; -} - -void ConvOperator::allocConvWorkSpace(size_t maxWorkSpace) { - if (maxWorkSpace > workSpaceInBytes_) { - if (workSpaceInBytes_ != 0) { - hl_free_mem_device(workSpace_); - } - // total amount of storage needed - workSpace_ = hl_malloc_device(maxWorkSpace); - workSpaceInBytes_ = maxWorkSpace; - } -} - void ConvOperator::reshape(int batchSize) { imageH_ = ins_[0]->getFrameHeight(); imageW_ = ins_[0]->getFrameWidth(); @@ -148,106 +36,25 @@ void ConvOperator::reshape(int batchSize) { if (imageW_ == 0) imageW_ = imgSize_; outputH_ = outputSize(imageH_, filterSizeY_, paddingY_, strideY_, caffeMode_); outputW_ = outputSize(imageW_, filterSize_, padding_, stride_, caffeMode_); - + /// Check that the outputSizes are consistent with config + CHECK_EQ(outputH_, outputY_); + CHECK_EQ(outputW_, outputX_); out_->setFrameHeight(outputH_); out_->setFrameWidth(outputW_); reshapeImageDescriptors(); - if (!isSelectAlgo_) { - hl_conv_workspace(inputDesc_, - outputDesc_, - filterDesc_, - convDesc_, - &fwdAlgo_, - &fwdLimitBytes_, - &bwdDataAlgo_, - &bwdDataLimitBytes_, - &bwdFilterAlgo_, - &bwdFilterLimitBytes_); - - size_t maxWorkSpace = 0; - maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_); - maxWorkSpace = std::max(maxWorkSpace, bwdFilterLimitBytes_); + inputOffset_ = channels_ * imageH_ * imageW_; + outputOffset_ = numFilters_ * outputH_ * outputW_; + weightOffset_ = numFilters_ * channels_ * filterSize_ * filterSizeY_; - allocConvWorkSpace(maxWorkSpace); + if (!isSelectAlgo_) { + allocConvWorkSpace(); } isSelectAlgo_ = true; } -void ConvOperator::computeConvSizes() { - hl_create_filter_descriptor( - &filterDesc_, channels_, numFilters_, filterSizeY_, filterSize_); - hl_create_tensor_descriptor(&inputDesc_); - int outputX = - outputSize(imgSize_, filterSize_, padding_, stride_, caffeMode_); - int outputY = - outputSize(imgSizeY_, filterSizeY_, paddingY_, strideY_, caffeMode_); - CHECK_EQ(outputX, outputX_); - CHECK_EQ(outputY, outputY_); - hl_create_tensor_descriptor(&outputDesc_); - hl_create_convolution_descriptor(&convDesc_, - inputDesc_, - filterDesc_, - paddingY_, - padding_, - strideY_, - stride_); -} - -void ConvOperator::reshapeImageDescriptors() { - hl_tensor_reshape(inputDesc_, - 1, - channels_, - imageH_, - imageW_, - channels_ * imageH_ * imageW_, - imageH_ * imageW_, - imageW_, - 1); - hl_tensor_reshape(outputDesc_, - 1, - numFilters_, - outputH_, - outputW_, - numFilters_ * outputH_ * outputW_, - outputH_ * outputW_, - outputW_, - 1); - hl_reset_convolution_descriptor(convDesc_, - inputDesc_, - filterDesc_, - paddingY_, - padding_, - strideY_, - stride_); - inputOffset_ = channels_ * imageH_ * imageW_; - outputOffset_ = numFilters_ * outputH_ * outputW_; - weightOffset_ = numFilters_ * channels_ * filterSize_ * filterSize_; -} - -void ConvOperator::getConvParams() { - numFilters_ = config_.num_filters(); - const ConvConfig &conf = config_.conv_conf(); - padding_ = conf.padding(); - stride_ = conf.stride(); - filterSize_ = conf.filter_size(); - paddingY_ = conf.padding_y(); - strideY_ = conf.stride_y(); - filterSizeY_ = conf.filter_size_y(); - filterPixels_ = filterSize_ * filterSizeY_; - channels_ = conf.channels(); - imgSize_ = conf.img_size(); - imgSizeY_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size(); - imgPixels_ = imgSize_ * imgSizeY_; - CHECK_EQ(conf.groups(), 1U); - filterChannels_ = conf.filter_channels(); - outputX_ = conf.output_x(); - outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x(); - outputs_ = outputX_ * outputX_; -} - void ConvOperator::forward() { size_t batchSize = ins_[0]->value->getHeight(); reshape(batchSize); @@ -264,7 +71,7 @@ void ConvOperator::forward() { real *inputData = ins_[0]->value->getData() + inputOffset_ * batchId; real *wgtData = ins_[1]->value->getData() + weightOffset_ * batchId; real *outData = out_->value->getData() + outputOffset_ * batchId; - hl_convolution_forward(inputDesc_, + hl_convolution_forward(imageDesc_, inputData, outputDesc_, outData, @@ -287,7 +94,7 @@ void ConvOperator::backward() { if (ins_[1]->grad) { real *inputData = ins_[0]->value->getData() + inputOffset_ * batchId; real *weightGrad = ins_[1]->grad->getData() + weightOffset_ * batchId; - hl_convolution_backward_filter(inputDesc_, + hl_convolution_backward_filter(imageDesc_, inputData, outputDesc_, outGrad, @@ -303,7 +110,7 @@ void ConvOperator::backward() { if (NULL != preGrad) { real *inputGrad = preGrad->getData() + inputOffset_ * batchId; real *wgtData = ins_[1]->value->getData() + weightOffset_ * batchId; - hl_convolution_backward_data(inputDesc_, + hl_convolution_backward_data(imageDesc_, inputGrad, outputDesc_, outGrad, diff --git a/paddle/gserver/layers/ConvOperator.h b/paddle/gserver/layers/ConvOperator.h new file mode 100644 index 0000000000000000000000000000000000000000..0f3546c67ac174628044d5fb6e5c7bce06f37995 --- /dev/null +++ b/paddle/gserver/layers/ConvOperator.h @@ -0,0 +1,44 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once + +#include "ConvBaseOperator.h" +#include "paddle/math/MathUtils.h" +#include "paddle/math/Matrix.h" + +namespace paddle { + +/** + * @brief ConvOperator takes two inputs to perform the convolution. + * The first input is the image, and the second input is the convolution kernel. + * The height of data for two inputs are the same. Each data of the first input + * is convolved with each data of the second input indepedently. + * + * The config file api is conv_operator. + */ + +class ConvOperator : public ConvBaseOperator { +public: + ConvOperator(const OperatorConfig &config, bool useGpu) + : ConvBaseOperator(config, useGpu) {} + /** + * Free workspace in device and destroy cudnn tensor descriptor. + */ + virtual ~ConvOperator() {} + void forward() override; + void backward() override; + void reshape(int batchSize) override; +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/ConvProjection.cpp b/paddle/gserver/layers/ConvProjection.cpp index 0281170bc59855f6f4d2f4212523275a92d202d5..5b7ecc5560c1e7431305b34a331fe1fbc96c6b06 100644 --- a/paddle/gserver/layers/ConvProjection.cpp +++ b/paddle/gserver/layers/ConvProjection.cpp @@ -19,149 +19,32 @@ namespace paddle { REGISTER_PROJECTION(conv, ConvProjection); -ThreadLocalD> ConvProjection::convMem_; - -ConvProjection::ConvProjection(const ProjectionConfig &config, - ParameterPtr parameter, - bool useGpu) - : Projection(config, parameter, useGpu) { - CHECK(useGpu); // only support GPU - getConvParams(); - initCudnn(); - - size_t height = filterH_ * filterW_ * channels_ / groups_; - size_t width = numFilters_; - weight_.reset(new Weight(height, width, parameter)); - weightOffset_ = height * width / groups_; -} - -void ConvProjection::getConvParams() { - const ConvConfig &conf = config_.conv_conf(); - paddingH_ = conf.padding_y(); - paddingW_ = conf.padding(); - - strideH_ = conf.stride_y(); - strideW_ = conf.stride(); - - filterH_ = conf.filter_size_y(); - filterW_ = conf.filter_size(); - - configImgH_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size(); - configImgW_ = conf.img_size(); - - channels_ = conf.channels(); - numFilters_ = config_.num_filters(); - - groups_ = conf.groups(); - CHECK_EQ(channels_ % groups_, 0); - CHECK_EQ(numFilters_ % groups_, 0); -} - -void ConvProjection::initCudnn() { - hl_create_filter_descriptor(&filterDesc_, - channels_ / groups_, - numFilters_ / groups_, - filterH_, - filterW_); - hl_create_tensor_descriptor(&inputDesc_); - hl_create_tensor_descriptor(&outputDesc_); - hl_create_convolution_descriptor(&convDesc_, - inputDesc_, - filterDesc_, - paddingH_, - paddingW_, - strideH_, - strideW_); - - // initialize all to default algorithms - fwdAlgo_ = 0; - bwdFilterAlgo_ = 0; - bwdDataAlgo_ = 0; - fwdLimitBytes_ = 0; - bwdDataLimitBytes_ = 0; - bwdFilterLimitBytes_ = 0; - workSpaceInBytes_ = 0; - - batchNum_ = 0; - isSelectAlgo_ = false; -} - -void ConvProjection::reshapeTensorDesc(int batchSize) { - hl_tensor_reshape(inputDesc_, - batchSize, - channels_ / groups_, - imageH_, - imageW_, - channels_ * imageH_ * imageW_, - imageH_ * imageW_, - imageW_, - 1); - hl_reset_convolution_descriptor(convDesc_, - inputDesc_, - filterDesc_, - paddingH_, - paddingW_, - strideH_, - strideW_); - - // The stride between two consecutive images in ConvProjection may not be 1, - // for example, in the case of layer ConcatenateLayer2 with two - // ConvProjection, the stride is the output_size of layer ConcatenateLayer2. - // So the calculation of nStride is different from CudnnConvLayer. - // In fact, only "nStride = out_->value->getStride()" is ok. - size_t nStride = numFilters_ * outputH_ * outputW_; - if (out_->value->isContiguous()) { - CHECK_EQ(nStride, out_->value->getWidth()); - } else { - nStride = out_->value->getStride(); - } - - hl_tensor_reshape(outputDesc_, - batchSize, - numFilters_ / groups_, - outputH_, - outputW_, - nStride, - outputH_ * outputW_, - outputW_, - 1); +size_t ConvProjection::calOutputSize() { + imageH_ = in_->getFrameHeight(); + imageW_ = in_->getFrameWidth(); + if (imageH_ == 0) imageH_ = configImgH_; + if (imageW_ == 0) imageW_ = configImgW_; + outputH_ = outputSize(imageH_, + filterH_, + paddingH_, + strideH_, + /* caffeMode */ true); + outputW_ = outputSize(imageW_, + filterW_, + paddingW_, + strideW_, + /* caffeMode */ true); + + const_cast(out_)->setFrameHeight(outputH_); + const_cast(out_)->setFrameWidth(outputW_); + + inputOffset_ = (configChannels_ / groups_) * imageH_ * imageW_; + outputOffset_ = (configNumFilters_ / groups_) * outputH_ * outputW_; + return outputH_ * outputW_ * configNumFilters_; } -void ConvProjection::reshape(int batchSize) { - size_t width = calOutputSize(); - CHECK_EQ(width, out_->value->getWidth()); - CHECK_EQ(static_cast(channels_ * imageH_ * imageW_), - in_->value->getWidth()) - << "Wrong input size for convolution" - << " channels=" << channels_ << " imageH=" << imageH_ - << " imageW=" << imageW_ << " inputSize=" << in_->value->getWidth(); - - isSelectAlgo_ = (batchSize == batchNum_); - batchNum_ = batchSize; - - if (!isSelectAlgo_) { - reshapeTensorDesc(batchSize); - hl_conv_workspace(inputDesc_, - outputDesc_, - filterDesc_, - convDesc_, - &fwdAlgo_, - &fwdLimitBytes_, - &bwdDataAlgo_, - &bwdDataLimitBytes_, - &bwdFilterAlgo_, - &bwdFilterLimitBytes_); - - size_t maxWorkSpace = 0; - maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_); - maxWorkSpace = std::max(maxWorkSpace, bwdFilterLimitBytes_); - workSpaceInBytes_ = maxWorkSpace; - - VLOG(3) << getName() << " Fwd / BwdData / BwdFilter algo: " << fwdAlgo_ - << " / " << bwdDataAlgo_ << " / " << bwdFilterAlgo_; - } - - isSelectAlgo_ = true; +size_t ConvProjection::calInputSize() { + return static_cast(configChannels_ * imageH_ * imageW_); } void ConvProjection::forward() { @@ -179,7 +62,7 @@ void ConvProjection::forward() { real *inputData = in_->value->getData() + g * inputOffset_; real *wgtData = weight_->getW()->getData() + g * weightOffset_; real *outData = out_->value->getData() + g * outputOffset_; - hl_convolution_forward(inputDesc_, + hl_convolution_forward(imageDesc_, inputData, outputDesc_, outData, @@ -205,7 +88,7 @@ void ConvProjection::backward(const UpdateCallback &callback) { if (weight_->getWGrad()) { real *inputData = in_->value->getData() + g * inputOffset_; real *weightGrad = weight_->getWGrad()->getData() + g * weightOffset_; - hl_convolution_backward_filter(inputDesc_, + hl_convolution_backward_filter(imageDesc_, inputData, outputDesc_, outGrad, @@ -221,7 +104,7 @@ void ConvProjection::backward(const UpdateCallback &callback) { if (NULL != preGrad) { real *inputGrad = preGrad->getData() + g * inputOffset_; real *wgtData = weight_->getW()->getData() + g * weightOffset_; - hl_convolution_backward_data(inputDesc_, + hl_convolution_backward_data(imageDesc_, inputGrad, outputDesc_, outGrad, @@ -237,26 +120,4 @@ void ConvProjection::backward(const UpdateCallback &callback) { weight_->getParameterPtr()->incUpdate(callback); } -void *ConvProjection::getSpaceBytes(size_t size) { - std::vector &convMem = *convMem_; - if (convMem.empty()) { - int numDevices = hl_get_device_count(); - convMem.resize(numDevices); - } - - int devId = hl_get_device(); - MemoryHandle **localMem = &(convMem[devId]); - if (NULL == *localMem || size > (*localMem)->getAllocSize()) { - *localMem = new GpuMemoryHandle(size); - } - return (*localMem)->getBuf(); -} - -ConvProjection::~ConvProjection() { - hl_destroy_tensor_descriptor(inputDesc_); - hl_destroy_tensor_descriptor(outputDesc_); - hl_destroy_filter_descriptor(filterDesc_); - hl_destroy_convolution_descriptor(convDesc_); -} - } // namespace paddle diff --git a/paddle/gserver/layers/ConvProjection.h b/paddle/gserver/layers/ConvProjection.h index c32e5e1d3ab2f85feb6dd2fb5fbddd7482598e58..b7d7cc9a275529a02a5d8e82d28ed79cb7ce0b43 100644 --- a/paddle/gserver/layers/ConvProjection.h +++ b/paddle/gserver/layers/ConvProjection.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once -#include "Projection.h" +#include "ConvBaseProjection.h" #include "paddle/math/MathUtils.h" namespace paddle { @@ -22,109 +22,22 @@ namespace paddle { /** * @brief Convolution projection do the same calculation with CudnnConvLayer. */ -class ConvProjection : public Projection { +class ConvProjection : public ConvBaseProjection { public: /** * Constructor. */ ConvProjection(const ProjectionConfig& config, ParameterPtr parameter, - bool useGpu); + bool useGpu) + : ConvBaseProjection(config, parameter, useGpu) {} - ~ConvProjection(); + ~ConvProjection() {} virtual void forward(); virtual void backward(const UpdateCallback& callback); - -protected: - void getConvParams(); - void initCudnn(); - - void reshapeTensorDesc(int batchSize); - void reshape(int batchSize); - - size_t calOutputSize() { - imageH_ = in_->getFrameHeight(); - imageW_ = in_->getFrameWidth(); - if (imageH_ == 0) imageH_ = configImgH_; - if (imageW_ == 0) imageW_ = configImgW_; - outputH_ = outputSize(imageH_, - filterH_, - paddingH_, - strideH_, - /* caffeMode */ true); - outputW_ = outputSize(imageW_, - filterW_, - paddingW_, - strideW_, - /* caffeMode */ true); - - const_cast(out_)->setFrameHeight(outputH_); - const_cast(out_)->setFrameWidth(outputW_); - - inputOffset_ = (channels_ / groups_) * imageH_ * imageW_; - outputOffset_ = (numFilters_ / groups_) * outputH_ * outputW_; - return outputH_ * outputW_ * numFilters_; - } - - static void* getSpaceBytes(size_t size); - - /// imageH_ and imageW_ is calculated from the input layer. - int imageH_, imageW_; - /// configImgH_ and configImgW_ is obtained from config. - int configImgH_, configImgW_; - int outputH_, outputW_; - int channels_, numFilters_; - int paddingH_, paddingW_; - int strideH_, strideW_; - int filterH_, filterW_; - /// One group offset of input data. - int inputOffset_; - /// One group offset of output data. - int outputOffset_; - /// One group offset of weight. - int weightOffset_; - int groups_; - - /// Cudnn tensor descriptor for input. - hl_tensor_descriptor inputDesc_; - /// Cudnn tensor descriptor for output. - hl_tensor_descriptor outputDesc_; - /// Cudnn tensor descriptor for filter. - hl_filter_descriptor filterDesc_; - /// Cudnn tensor descriptor for a convolution operation. - hl_convolution_descriptor convDesc_; - - /// Record the algorithm for forward convolution, which is obtained by cudnn - /// api to search the best suited algorithm. - int fwdAlgo_; - /// Record the algorithm for computing convolution gradient with respect to - /// filter coefficients. - int bwdFilterAlgo_; - /// Record the algorithm for computing convolution gradient with respect to - /// the output. - int bwdDataAlgo_; - /// Amount of GPU memory needed as workspace to be able to execute a - /// forward convolution with the specified algo. - size_t fwdLimitBytes_; - /// Amount of GPU memory needed as workspace to be able to execute a - /// backwardFilter with the specified algo. - size_t bwdDataLimitBytes_; - /// Amount of GPU memory needed as workspace to be able to execute a - /// backwardData with the specified algo. - size_t bwdFilterLimitBytes_; - /// Size of total work space. - size_t workSpaceInBytes_; - - /// Whether to call cuDNN api to choose conv algorithm. - bool isSelectAlgo_; - /// batchNum is used to record batch size. If the batch size is changed, - /// the selection algorithm will be called. - int batchNum_; - bool bias_; - - std::unique_ptr weight_; - static ThreadLocalD> convMem_; + virtual size_t calOutputSize(); + virtual size_t calInputSize(); }; } // namespace paddle diff --git a/paddle/gserver/layers/ConvTransOperator.cpp b/paddle/gserver/layers/ConvTransOperator.cpp new file mode 100644 index 0000000000000000000000000000000000000000..db026337a473f7edf1a7c0db320f60ff3048eb9c --- /dev/null +++ b/paddle/gserver/layers/ConvTransOperator.cpp @@ -0,0 +1,125 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "ConvTransOperator.h" +#include "paddle/math/MathUtils.h" +#include "paddle/math/Matrix.h" + +namespace paddle { + +/** + * @brief ConvTransOperator takes two inputs to perform the convolution. + * The first input is the image, and the second input is the convolution kernel. + * The height of data for two inputs are the same. Each data of the first input + * is convolved with each data of the second input indepedently. + * + * The config file api is conv_operator. + */ + +REGISTER_OPERATOR(convt, ConvTransOperator); + +void ConvTransOperator::reshape(int batchSize) { + outputH_ = ins_[0]->getFrameHeight(); + outputW_ = ins_[0]->getFrameWidth(); + if (outputH_ == 0) outputH_ = outputY_; + if (outputW_ == 0) outputW_ = outputX_; + imageH_ = imageSize(outputH_, filterSizeY_, paddingY_, strideY_, caffeMode_); + imageW_ = imageSize(outputW_, filterSize_, padding_, stride_, caffeMode_); + /// Check that the imageSizes are consistent with config + CHECK_EQ(imageH_, imgSizeY_); + CHECK_EQ(imageW_, imgSize_); + out_->setFrameHeight(imageH_); + out_->setFrameWidth(imageW_); + + reshapeImageDescriptors(); + + inputOffset_ = numFilters_ * outputH_ * outputW_; + outputOffset_ = channels_ * imageH_ * imageW_; + weightOffset_ = numFilters_ * channels_ * filterSize_ * filterSizeY_; + + if (!isSelectAlgo_) { + allocConvWorkSpace(); + } + + isSelectAlgo_ = true; +} + +void ConvTransOperator::forward() { + size_t batchSize = ins_[0]->value->getHeight(); + reshape(batchSize); + CHECK_EQ(ins_[1]->value->getHeight(), batchSize); + checkFilterSize(ins_[1]->value); + Matrix::resizeOrCreate( + out_->value, batchSize, imageH_ * imageW_ * channels_, false, useGpu_); + { + AsyncGpuBlock block; + for (size_t batchId = 0; batchId < batchSize; ++batchId) { + real *inputData = ins_[0]->value->getData() + inputOffset_ * batchId; + real *wgtData = ins_[1]->value->getData() + weightOffset_ * batchId; + real *outData = out_->value->getData() + outputOffset_ * batchId; + hl_convolution_backward_data(imageDesc_, + outData, + outputDesc_, + inputData, + filterDesc_, + wgtData, + convDesc_, + workSpace_, + workSpaceInBytes_, + bwdDataAlgo_); + } + } +} + +void ConvTransOperator::backward() { + size_t batchSize = ins_[0]->value->getHeight(); + { + AsyncGpuBlock block; + for (size_t batchId = 0; batchId < batchSize; ++batchId) { + real *outGrad = out_->grad->getData() + outputOffset_ * batchId; + if (ins_[1]->grad) { + real *inputData = ins_[0]->value->getData() + inputOffset_ * batchId; + real *weightGrad = ins_[1]->grad->getData() + weightOffset_ * batchId; + hl_convolution_backward_filter(imageDesc_, + outGrad, + outputDesc_, + inputData, + filterDesc_, + weightGrad, + convDesc_, + workSpace_, + workSpaceInBytes_, + bwdFilterAlgo_); + } + + MatrixPtr preGrad = ins_[0]->grad; + if (NULL != preGrad) { + real *inputGrad = preGrad->getData() + inputOffset_ * batchId; + real *wgtData = ins_[1]->value->getData() + weightOffset_ * batchId; + hl_convolution_forward(imageDesc_, + outGrad, + outputDesc_, + inputGrad, + filterDesc_, + wgtData, + convDesc_, + workSpace_, + workSpaceInBytes_, + fwdAlgo_); + } + } + } +} + +} // namespace paddle diff --git a/paddle/gserver/layers/ConvTransOperator.h b/paddle/gserver/layers/ConvTransOperator.h new file mode 100644 index 0000000000000000000000000000000000000000..ca08dc9aa77d59b45635c16cdd5064c5c3b5f96d --- /dev/null +++ b/paddle/gserver/layers/ConvTransOperator.h @@ -0,0 +1,44 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once + +#include "ConvBaseOperator.h" +#include "paddle/math/MathUtils.h" +#include "paddle/math/Matrix.h" + +namespace paddle { + +/** + * @brief ConvTransOperator takes two inputs to perform the convolution. + * The first input is the image, and the second input is the convolution kernel. + * The height of data for two inputs are the same. Each data of the first input + * is convolved with each data of the second input indepedently. + * + * The config file api is conv_operator. + */ + +class ConvTransOperator : public ConvBaseOperator { +public: + ConvTransOperator(const OperatorConfig &config, bool useGpu) + : ConvBaseOperator(config, useGpu) {} + /** + * Free workspace in device and destroy cudnn tensor descriptor. + */ + virtual ~ConvTransOperator() {} + void forward() override; + void backward() override; + void reshape(int batchSize) override; +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/ConvTransProjection.cpp b/paddle/gserver/layers/ConvTransProjection.cpp new file mode 100644 index 0000000000000000000000000000000000000000..48132a3ce4cc4b50fea6d755d84d7254d2055bec --- /dev/null +++ b/paddle/gserver/layers/ConvTransProjection.cpp @@ -0,0 +1,123 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "ConvTransProjection.h" +#include "paddle/utils/Stat.h" + +namespace paddle { + +REGISTER_PROJECTION(convt, ConvTransProjection); +size_t ConvTransProjection::calOutputSize() { + outputH_ = in_->getFrameHeight(); + outputW_ = in_->getFrameWidth(); + if (outputH_ == 0) outputH_ = configOutH_; + if (outputW_ == 0) outputW_ = configOutW_; + imageH_ = imageSize(outputH_, + filterH_, + paddingH_, + strideH_, + /* caffeMode */ true); + + imageW_ = imageSize(outputW_, + filterW_, + paddingW_, + strideW_, + /* caffeMode */ true); + + const_cast(out_)->setFrameHeight(imageH_); + const_cast(out_)->setFrameWidth(imageW_); + + inputOffset_ = (configChannels_ / groups_) * outputH_ * outputW_; + outputOffset_ = (configNumFilters_ / groups_) * imageH_ * imageW_; + return imageH_ * imageW_ * configNumFilters_; +} + +size_t ConvTransProjection::calInputSize() { + return static_cast(configChannels_ * outputH_ * outputW_); +} + +void ConvTransProjection::forward() { + int batchSize = in_->value->getHeight(); + reshape(batchSize); + + void *workSpace = NULL; + if (workSpaceInBytes_ > 0) { + workSpace = getSpaceBytes(workSpaceInBytes_); + } + + for (int g = 0; g < groups_; ++g) { + REGISTER_TIMER_INFO("CudnnConvTransFwTimer", getName().c_str()); + + real *inData = in_->value->getData() + g * inputOffset_; + real *wgtData = weight_->getW()->getData() + g * weightOffset_; + real *outData = out_->value->getData() + g * outputOffset_; + hl_convolution_backward_data(imageDesc_, + outData, + outputDesc_, + inData, + filterDesc_, + wgtData, + convDesc_, + workSpace, + bwdDataLimitBytes_, + bwdDataAlgo_); + } +} + +void ConvTransProjection::backward(const UpdateCallback &callback) { + REGISTER_TIMER_INFO("CudnnConvTransBpTimer", getName().c_str()); + + void *workSpace = NULL; + if (workSpaceInBytes_ > 0) { + workSpace = getSpaceBytes(workSpaceInBytes_); + } + + for (int g = 0; g < groups_; ++g) { + real *outGrad = out_->grad->getData() + g * outputOffset_; + if (weight_->getWGrad()) { + real *inData = in_->value->getData() + g * inputOffset_; + real *weightGrad = weight_->getWGrad()->getData() + g * weightOffset_; + hl_convolution_backward_filter(imageDesc_, + outGrad, + outputDesc_, + inData, + filterDesc_, + weightGrad, + convDesc_, + workSpace, + bwdFilterLimitBytes_, + bwdFilterAlgo_); + } + + MatrixPtr preGrad = in_->grad; + if (NULL != preGrad) { + real *inGrad = preGrad->getData() + g * inputOffset_; + real *wgtData = weight_->getW()->getData() + g * weightOffset_; + hl_convolution_forward(imageDesc_, + outGrad, + outputDesc_, + inGrad, + filterDesc_, + wgtData, + convDesc_, + workSpace, + fwdLimitBytes_, + fwdAlgo_); + } + } + + weight_->getParameterPtr()->incUpdate(callback); +} + +} // namespace paddle diff --git a/paddle/gserver/layers/ConvTransProjection.h b/paddle/gserver/layers/ConvTransProjection.h new file mode 100644 index 0000000000000000000000000000000000000000..6508d17b2409aa0cc11cdafb306604816f010718 --- /dev/null +++ b/paddle/gserver/layers/ConvTransProjection.h @@ -0,0 +1,43 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "ConvBaseProjection.h" +#include "paddle/math/MathUtils.h" + +namespace paddle { + +/** + * @brief Convolution projection do the same calculation with CudnnConvLayer. + */ +class ConvTransProjection : public ConvBaseProjection { +public: + /** + * Constructor. + */ + ConvTransProjection(const ProjectionConfig& config, + ParameterPtr parameter, + bool useGpu) + : ConvBaseProjection(config, parameter, useGpu) {} + + ~ConvTransProjection() {} + + virtual void forward(); + virtual void backward(const UpdateCallback& callback); + virtual size_t calOutputSize(); + virtual size_t calInputSize(); +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp index 998b8d7d3034cb18fbab242c66656092bfc50fcb..4ae5b828707eb8412e98cbefcf3949d62e81ad1e 100644 --- a/paddle/gserver/layers/CostLayer.cpp +++ b/paddle/gserver/layers/CostLayer.cpp @@ -192,6 +192,59 @@ void SumOfSquaresCostLayer::backwardImp(Matrix& output, outputG.sumOfSquaresBp(output, *label.value); } +// +// class SmoothL1CostLayer +// + +REGISTER_LAYER(smooth_l1, SmoothL1CostLayer); + +bool SmoothL1CostLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + return CostLayer::init(layerMap, parameterMap); +} + +void SmoothL1CostLayer::forwardImp(Matrix& output, + Argument& label, + Matrix& target) { + MatrixPtr targetCpu, outputCpu, labelCpu; + if (useGpu_) { + targetCpu = + Matrix::create(target.getHeight(), target.getWidth(), false, false); + outputCpu = + Matrix::create(output.getHeight(), output.getWidth(), false, false); + labelCpu = Matrix::create( + label.value->getHeight(), label.value->getWidth(), false, false); + targetCpu->copyFrom(target); + outputCpu->copyFrom(output); + labelCpu->copyFrom(*label.value); + targetCpu->smoothL1(*outputCpu, *(labelCpu)); + target.copyFrom(*targetCpu); + } else { + target.smoothL1(output, *label.value); + } +} + +void SmoothL1CostLayer::backwardImp(Matrix& output, + Argument& label, + Matrix& outputG) { + MatrixPtr outputGCpu, outputCpu, labelCpu; + if (useGpu_) { + outputGCpu = + Matrix::create(outputG.getHeight(), outputG.getWidth(), false, false); + outputCpu = + Matrix::create(output.getHeight(), output.getWidth(), false, false); + labelCpu = Matrix::create( + label.value->getHeight(), label.value->getWidth(), false, false); + outputGCpu->copyFrom(outputG); + outputCpu->copyFrom(output); + labelCpu->copyFrom(*label.value); + outputGCpu->smoothL1Bp(*outputCpu, *labelCpu); + outputG.copyFrom(*outputGCpu); + } else { + outputG.smoothL1Bp(output, *label.value); + } +} + // // class RankingCost // diff --git a/paddle/gserver/layers/CostLayer.h b/paddle/gserver/layers/CostLayer.h index b3045e0b31308abf2caa90cbd21f105e685ef341..569a6840f0d4432cc827219f590b821df115c7ea 100644 --- a/paddle/gserver/layers/CostLayer.h +++ b/paddle/gserver/layers/CostLayer.h @@ -159,6 +159,29 @@ public: Matrix& outputGrad) override; }; +/** + * This cost layer compute smooth L1 loss for real-valued regression + * tasks. + * \f[ + * L = + * (output - label)^2 * 0.5 / -1 < (output - label) < 1 / + * (output - label) - 0.5 / otherwise / + * \f] + */ +class SmoothL1CostLayer : public CostLayer { +public: + explicit SmoothL1CostLayer(const LayerConfig& config) : CostLayer(config) {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; + + void backwardImp(Matrix& outputValue, + Argument& label, + Matrix& outputGrad) override; +}; + /** * A cost layer for learning to rank (LTR) task. This layer contains at leat * three inputs. diff --git a/paddle/gserver/layers/CrossChannelNormLayer.cpp b/paddle/gserver/layers/CrossChannelNormLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3fbccc11032caa4878ce8dcfe7c34a261acee68b --- /dev/null +++ b/paddle/gserver/layers/CrossChannelNormLayer.cpp @@ -0,0 +1,122 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Layer.h" +#include "NormLayer.h" +#include "paddle/math/BaseMatrix.h" +#include "paddle/math/Matrix.h" + +namespace paddle { + +MatrixPtr CrossChannelNormLayer::createSampleMatrix(MatrixPtr data, + size_t iter, + size_t spatialDim) { + return Matrix::create(data->getData() + iter * channels_ * spatialDim, + channels_, + spatialDim, + false, + useGpu_); +} + +MatrixPtr CrossChannelNormLayer::createSpatialMatrix(MatrixPtr data, + size_t iter, + size_t spatialDim) { + return Matrix::create( + data->getData() + iter * spatialDim, 1, spatialDim, false, useGpu_); +} + +void CrossChannelNormLayer::forward(PassType passType) { + Layer::forward(passType); + MatrixPtr inV = getInputValue(0); + + size_t batchSize = inV->getHeight(); + size_t dataDim = inV->getWidth(); + CHECK_EQ(getSize(), dataDim); + + reserveOutput(batchSize, dataDim); + MatrixPtr outV = getOutputValue(); + size_t spatialDim = dataDim / channels_; + + Matrix::resizeOrCreate(dataBuffer_, batchSize, dataDim, false, useGpu_); + Matrix::resizeOrCreate(spatialBuffer_, 1, spatialDim, false, useGpu_); + Matrix::resizeOrCreate(normBuffer_, batchSize, spatialDim, false, useGpu_); + normBuffer_->zeroMem(); + // add eps to avoid overflow + normBuffer_->addScalar(*normBuffer_, 1e-6); + inV->square2(*dataBuffer_); + for (size_t i = 0; i < batchSize; i++) { + const MatrixPtr inVTmp = createSampleMatrix(inV, i, spatialDim); + const MatrixPtr dataTmp = createSampleMatrix(dataBuffer_, i, spatialDim); + MatrixPtr outVTmp = createSampleMatrix(outV, i, spatialDim); + MatrixPtr normTmp = createSpatialMatrix(normBuffer_, i, spatialDim); + + // compute norm. + spatialBuffer_->sumCols(*dataTmp, 1, 0); + spatialBuffer_->sqrt2(*spatialBuffer_); + normTmp->copyFrom(*spatialBuffer_); + outVTmp->copyFrom(*inVTmp); + outVTmp->divRowVector(*spatialBuffer_); + // scale the layer. + outVTmp->mulColVector(*scale_->getW()); + } +} + +void CrossChannelNormLayer::backward(const UpdateCallback& callback) { + MatrixPtr inG = getInputGrad(0); + MatrixPtr inV = getInputValue(0); + MatrixPtr outG = getOutputGrad(); + MatrixPtr outV = getOutputValue(); + + size_t batchSize = inG->getHeight(); + size_t dataDim = inG->getWidth(); + size_t spatialDim = dataDim / channels_; + + dataBuffer_->dotMul(*outG, *outV); + Matrix::resizeOrCreate(scaleDiff_, channels_, 1, false, useGpu_); + Matrix::resizeOrCreate(channelBuffer_, channels_, 1, false, useGpu_); + Matrix::resizeOrCreate(sampleBuffer_, channels_, spatialDim, false, useGpu_); + scaleDiff_->zeroMem(); + for (size_t i = 0; i < batchSize; i++) { + MatrixPtr outGTmp = createSampleMatrix(outG, i, spatialDim); + const MatrixPtr dataTmp = createSampleMatrix(dataBuffer_, i, spatialDim); + const MatrixPtr inVTmp = createSampleMatrix(inV, i, spatialDim); + const MatrixPtr inGTmp = createSampleMatrix(inG, i, spatialDim); + const MatrixPtr normTmp = createSpatialMatrix(normBuffer_, i, spatialDim); + + channelBuffer_->sumRows(*dataTmp, 1, 0); + channelBuffer_->dotDiv(*channelBuffer_, *(scale_->getW())); + // store a / scale[i] in scaleDiff_ temporary + scaleDiff_->add(*channelBuffer_, 1.); + + sampleBuffer_->dotMul(*inVTmp, *outGTmp); + spatialBuffer_->sumCols(*sampleBuffer_, 1., 1.); + // scale the grad + inGTmp->copyFrom(*inVTmp); + inGTmp->mulRowVector(*spatialBuffer_); + // divide by square of norm + spatialBuffer_->dotMul(*normTmp, *normTmp); + inGTmp->divRowVector(*spatialBuffer_); + // subtract + inGTmp->add(*outGTmp, -1, 1); + // divide by norm + inGTmp->divRowVector(*normTmp); + // scale the diff + inGTmp->mulColVector(*scale_->getW()); + } + // updata scale + if (scale_->getWGrad()) scale_->getWGrad()->copyFrom(*scaleDiff_); + scale_->getParameterPtr()->incUpdate(callback); +} + +} // namespace paddle diff --git a/paddle/gserver/layers/CudnnConvLayer.cpp b/paddle/gserver/layers/CudnnConvBaseLayer.cpp similarity index 66% rename from paddle/gserver/layers/CudnnConvLayer.cpp rename to paddle/gserver/layers/CudnnConvBaseLayer.cpp index 978c2c1479c64ab2cdebaaff7394059b3d033ab6..24363bb8b09cc354c25abe512257be68566c10e1 100644 --- a/paddle/gserver/layers/CudnnConvLayer.cpp +++ b/paddle/gserver/layers/CudnnConvBaseLayer.cpp @@ -12,16 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "CudnnConvLayer.h" +#include "CudnnConvBaseLayer.h" #include "paddle/utils/Logging.h" #include "paddle/utils/Stat.h" namespace paddle { +REGISTER_LAYER(cudnn_conv, CudnnConvBaseLayer); +REGISTER_LAYER(cudnn_convt, CudnnConvBaseLayer); -REGISTER_LAYER(cudnn_conv, CudnnConvLayer); - -bool CudnnConvLayer::init(const LayerMap &layerMap, - const ParameterMap ¶meterMap) { +bool CudnnConvBaseLayer::init(const LayerMap &layerMap, + const ParameterMap ¶meterMap) { if (!ConvBaseLayer::init(layerMap, parameterMap)) return false; CHECK(useGpu_) << "CudnnConvLayer only support gpu"; @@ -33,7 +33,11 @@ bool CudnnConvLayer::init(const LayerMap &layerMap, CHECK(config_.shared_biases()); for (size_t i = 0; i < inputLayers_.size(); i++) { ProjectionConfig *conf = new ProjectionConfig(); - conf->set_type("conv"); + if (isDeconv_) { + conf->set_type("convt"); + } else { + conf->set_type("conv"); + } conf->set_num_filters(numFilters_); ConvConfig *convConf = conf->mutable_conv_conf(); *convConf = *(config_.mutable_inputs(i)->mutable_conv_conf()); @@ -47,14 +51,13 @@ bool CudnnConvLayer::init(const LayerMap &layerMap, if (biases_.get() && sharedBiases_) { hl_create_tensor_descriptor(&biasDesc_); hl_create_tensor_descriptor(&outputDesc_); - hl_tensor_reshape(biasDesc_, 1, numFilters_ / groups_[0], 1, 1); - biasOffset_ = numFilters_ / groups_[0]; + hl_tensor_reshape(biasDesc_, 1, numFilters_, 1, 1); } return true; } -void CudnnConvLayer::forward(PassType passType) { +void CudnnConvBaseLayer::forward(PassType passType) { Layer::forward(passType); int batchSize = getInput(0).getBatchSize(); @@ -67,37 +70,41 @@ void CudnnConvLayer::forward(PassType passType) { if (biases_) { REGISTER_TIMER_INFO("CudnnConvBiasTimer", getName().c_str()); int batchSize = inputLayers_[0]->getOutputValue()->getHeight(); + int outH, outW; + if (isDeconv_) { + outH = imgSizeH_[0]; + outW = imgSizeW_[0]; + } else { + outH = outputH_[0]; + outW = outputW_[0]; + } + hl_tensor_reshape(outputDesc_, batchSize, - numFilters_ / groups_[0], - outputH_[0], - outputW_[0], - numFilters_ * outputH_[0] * outputW_[0], - outputH_[0] * outputW_[0], - outputW_[0], + numFilters_, + outH, + outW, + numFilters_ * outH * outW, + outH * outW, + outW, 1); - outputOffset_ = getOutputValue()->getWidth() / groups_[0]; - for (int g = 0; g < groups_[0]; ++g) { - real *biasData = biases_->getW()->getData() + biasOffset_ * g; - real *outData = getOutputValue()->getData() + outputOffset_ * g; - hl_convolution_forward_add_bias( - biasDesc_, biasData, outputDesc_, outData); - } + real *outData = getOutputValue()->getData(); + real *biasData = biases_->getW()->getData(); + hl_convolution_forward_add_bias(biasDesc_, biasData, outputDesc_, outData); } forwardActivation(); } -void CudnnConvLayer::backward(const UpdateCallback &callback) { +void CudnnConvBaseLayer::backward(const UpdateCallback &callback) { backwardActivation(); if (biases_ && biases_->getWGrad()) { REGISTER_TIMER_INFO("CudnnConvBpBiasTimer", getName().c_str()); - for (int g = 0; g < groups_[0]; ++g) { - real *biasGrad = biases_->getWGrad()->getData() + biasOffset_ * g; - real *outGrad = getOutputGrad()->getData() + outputOffset_ * g; - hl_convolution_backward_bias(biasDesc_, biasGrad, outputDesc_, outGrad); - } + real *biasGrad = biases_->getWGrad()->getData(); + real *outGrad = getOutputGrad()->getData(); + hl_convolution_backward_bias(biasDesc_, biasGrad, outputDesc_, outGrad); + biases_->getParameterPtr()->incUpdate(callback); } @@ -106,7 +113,7 @@ void CudnnConvLayer::backward(const UpdateCallback &callback) { } } -CudnnConvLayer::~CudnnConvLayer() { +CudnnConvBaseLayer::~CudnnConvBaseLayer() { if (biases_) { hl_destroy_tensor_descriptor(biasDesc_); hl_destroy_tensor_descriptor(outputDesc_); diff --git a/paddle/gserver/layers/CudnnConvLayer.h b/paddle/gserver/layers/CudnnConvBaseLayer.h similarity index 86% rename from paddle/gserver/layers/CudnnConvLayer.h rename to paddle/gserver/layers/CudnnConvBaseLayer.h index 919b1efc4e453219a6c2ab1a11c61ccb99404084..93a05f94c7717f9170818b9d5ce3d27a6d18cef5 100644 --- a/paddle/gserver/layers/CudnnConvLayer.h +++ b/paddle/gserver/layers/CudnnConvBaseLayer.h @@ -30,27 +30,24 @@ namespace paddle { * * The config file api is img_conv_layer. */ -class CudnnConvLayer : public ConvBaseLayer { +class CudnnConvBaseLayer : public ConvBaseLayer { protected: std::vector> projConf_; std::vector> projections_; hl_tensor_descriptor biasDesc_; hl_tensor_descriptor outputDesc_; - int biasOffset_; - int outputOffset_; public: - explicit CudnnConvLayer(const LayerConfig& config) : ConvBaseLayer(config) {} + explicit CudnnConvBaseLayer(const LayerConfig& config) + : ConvBaseLayer(config) {} - ~CudnnConvLayer(); + ~CudnnConvBaseLayer(); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback) override; bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) override; - void forward(PassType passType) override; - void backward(const UpdateCallback& callback) override; - void addBiases(); - void bpropBiases(); }; } // namespace paddle diff --git a/paddle/gserver/layers/NormLayer.cpp b/paddle/gserver/layers/NormLayer.cpp index 3db0af2515ee9f64aa6c0b0a441e88562d9e398e..e094078bfe86e30c06e1b80ebc04c8213fe9abcf 100644 --- a/paddle/gserver/layers/NormLayer.cpp +++ b/paddle/gserver/layers/NormLayer.cpp @@ -26,6 +26,8 @@ Layer* NormLayer::create(const LayerConfig& config) { return new ResponseNormLayer(config); } else if (norm == "cmrnorm-projection") { return new CMRProjectionNormLayer(config); + } else if (norm == "cross-channel-norm") { + return new CrossChannelNormLayer(config); } else { LOG(FATAL) << "Unknown norm type: " << norm; return nullptr; @@ -54,4 +56,14 @@ bool ResponseNormLayer::init(const LayerMap& layerMap, return true; } +bool CrossChannelNormLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + Layer::init(layerMap, parameterMap); + CHECK(parameters_[0]); + const NormConfig& conf = config_.inputs(0).norm_conf(); + channels_ = conf.channels(); + scale_.reset(new Weight(channels_, 1, parameters_[0])); + return true; +} + } // namespace paddle diff --git a/paddle/gserver/layers/NormLayer.h b/paddle/gserver/layers/NormLayer.h index e77faaa322570933b3ea2de877b7859857306432..7c238ac944e52c3a83c2aa5deac18de3aff6db61 100644 --- a/paddle/gserver/layers/NormLayer.h +++ b/paddle/gserver/layers/NormLayer.h @@ -65,4 +65,35 @@ public: } }; +/** + * This layer applys normalization across the channels of each sample to a + * conv layer's output, and scales the output by a group of trainable factors + * whose dimensions equal to the number of channels. + * - Input: One and only one input layer are accepted. + * - Output: The normalized data of the input data. + * Reference: + * Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, + * Cheng-Yang Fu, Alexander C. Berg. SSD: Single Shot MultiBox Detector + */ +class CrossChannelNormLayer : public NormLayer { +public: + explicit CrossChannelNormLayer(const LayerConfig& config) + : NormLayer(config) {} + bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + void forward(PassType passType); + void backward(const UpdateCallback& callback); + MatrixPtr createSampleMatrix(MatrixPtr data, size_t iter, size_t spatialDim); + MatrixPtr createSpatialMatrix(MatrixPtr data, size_t iter, size_t spatialDim); + +protected: + size_t channels_; + std::unique_ptr scale_; + MatrixPtr scaleDiff_; + MatrixPtr normBuffer_; + MatrixPtr dataBuffer_; + MatrixPtr channelBuffer_; + MatrixPtr spatialBuffer_; + MatrixPtr sampleBuffer_; +}; + } // namespace paddle diff --git a/paddle/gserver/layers/PriorBox.cpp b/paddle/gserver/layers/PriorBox.cpp index bcf5e912a50fef2cec8ebdf1e0dad9efa43fba2f..331bc7672ec0d39a7317c39f1d14e8dcadea471a 100644 --- a/paddle/gserver/layers/PriorBox.cpp +++ b/paddle/gserver/layers/PriorBox.cpp @@ -20,7 +20,7 @@ namespace paddle { /** * @brief A layer for generating priorbox locations and variances. * - Input: Two and only two input layer are accepted. The input layer must be - * be a data output layer and a convolution output layer. + * be a data output layer and a convolution output layer. * - Output: The priorbox locations and variances of the input data. * Reference: * Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, @@ -45,27 +45,32 @@ protected: MatrixPtr buffer_; }; +REGISTER_LAYER(priorbox, PriorBoxLayer); + bool PriorBoxLayer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { Layer::init(layerMap, parameterMap); auto pbConf = config_.inputs(0).priorbox_conf(); + std::vector tmp; + aspectRatio_.push_back(1.); std::copy(pbConf.min_size().begin(), pbConf.min_size().end(), std::back_inserter(minSize_)); std::copy(pbConf.max_size().begin(), pbConf.max_size().end(), std::back_inserter(maxSize_)); - std::copy(pbConf.aspect_ratio().begin(), - pbConf.aspect_ratio().end(), - std::back_inserter(aspectRatio_)); std::copy(pbConf.variance().begin(), pbConf.variance().end(), std::back_inserter(variance_)); + std::copy(pbConf.aspect_ratio().begin(), + pbConf.aspect_ratio().end(), + std::back_inserter(tmp)); // flip - int inputRatioLength = aspectRatio_.size(); - for (int index = 0; index < inputRatioLength; index++) - aspectRatio_.push_back(1 / aspectRatio_[index]); - aspectRatio_.push_back(1.); + int inputRatioLength = tmp.size(); + for (int index = 0; index < inputRatioLength; index++) { + aspectRatio_.push_back(tmp[index]); + aspectRatio_.push_back(1 / tmp[index]); + } numPriors_ = aspectRatio_.size(); if (maxSize_.size() > 0) numPriors_++; return true; @@ -94,12 +99,12 @@ void PriorBoxLayer::forward(PassType passType) { for (int w = 0; w < layerWidth; ++w) { real centerX = (w + 0.5) * stepW; real centerY = (h + 0.5) * stepH; - int minSize = 0; + real minSize = 0; for (size_t s = 0; s < minSize_.size(); s++) { // first prior. minSize = minSize_[s]; - int boxWidth = minSize; - int boxHeight = minSize; + real boxWidth = minSize; + real boxHeight = minSize; // xmin, ymin, xmax, ymax. tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth; tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight; @@ -112,7 +117,7 @@ void PriorBoxLayer::forward(PassType passType) { CHECK_EQ(minSize_.size(), maxSize_.size()); // second prior. for (size_t s = 0; s < maxSize_.size(); s++) { - int maxSize = maxSize_[s]; + real maxSize = maxSize_[s]; boxWidth = boxHeight = sqrt(minSize * maxSize); tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth; tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight; @@ -145,6 +150,5 @@ void PriorBoxLayer::forward(PassType passType) { MatrixPtr outV = getOutputValue(); outV->copyFrom(buffer_->data_, dim * 2); } -REGISTER_LAYER(priorbox, PriorBoxLayer); } // namespace paddle diff --git a/paddle/gserver/layers/SequencePoolLayer.cpp b/paddle/gserver/layers/SequencePoolLayer.cpp index 35260ca912d5d0e00213ffb7074bd8963da265da..5807c4249620db44fed82a6bb69a77d807d9f0a0 100644 --- a/paddle/gserver/layers/SequencePoolLayer.cpp +++ b/paddle/gserver/layers/SequencePoolLayer.cpp @@ -56,17 +56,16 @@ void SequencePoolLayer::forward(PassType passType) { CHECK_EQ(newBatchSize_, starts->getSize() - 1); resetOutput(newBatchSize_, dim); - if (type_) { - CHECK(input.subSequenceStartPositions) - << "when trans_type = seq, input must hasSubseq"; - } + /* If type_ = kNonSeq, both seq has or not has sub-seq degrade to a non-seq, * thus, in this case, output_ has no sequenceStartPositions. * If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this * case, we should compute the new sequenceStartPositions. */ if (type_) { - output_.degradeSequence(input, useGpu_); + CHECK(input.subSequenceStartPositions) + << "when trans_type = seq, input must hasSubseq"; + output_.degradeSequence(input); } } diff --git a/paddle/gserver/tests/test_ConvUnify.cpp b/paddle/gserver/tests/test_ConvUnify.cpp index 207fc0566fcf4a0d2e971f3c169a14a64146155b..54b72375b743fe025e0ded5fdbce5699a0b4be1a 100644 --- a/paddle/gserver/tests/test_ConvUnify.cpp +++ b/paddle/gserver/tests/test_ConvUnify.cpp @@ -34,8 +34,7 @@ DECLARE_double(checkgrad_eps); DECLARE_bool(thread_local_rand_use_global_seed); DECLARE_bool(prev_batch_state); -// Do one forward pass of convTrans layer and check to see if its output -// matches the given result +// Do one forward pass of ConvLayer using either exconv or cudnn_conv MatrixPtr doOneConvTest(size_t imgSize, size_t output_x, size_t stride, @@ -46,22 +45,35 @@ MatrixPtr doOneConvTest(size_t imgSize, size_t groups, MatrixPtr& inputData, real* param, - bool useGpu) { + bool useGpu, + bool isDeconv = false) { TestConfig config; config.biasSize = numfilters; + string layerType; if (useGpu) { - config.layerConfig.set_type("cudnn_conv"); + layerType = (isDeconv) ? "cudnn_convt" : "cudnn_conv"; } else { - config.layerConfig.set_type("exconv"); + layerType = (isDeconv) ? "exconvt" : "exconv"; } + config.layerConfig.set_type(layerType); config.layerConfig.set_num_filters(numfilters); config.layerConfig.set_partial_sum(1); config.layerConfig.set_shared_biases(true); size_t weightSize = channel * filter_size * filter_size * config.layerConfig.num_filters() / groups; - config.inputDefs.push_back( - {INPUT_DATA, "layer_0", imgSize * imgSize * channel, weightSize}); + if (isDeconv) { + config.inputDefs.push_back( + {INPUT_DATA, "layer_0", output_x * output_x * channel, weightSize}); + config.layerConfig.set_size(imgSize * imgSize * + config.layerConfig.num_filters()); + } else { + config.inputDefs.push_back( + {INPUT_DATA, "layer_0", imgSize * imgSize * channel, weightSize}); + config.layerConfig.set_size(output_x * output_x * + config.layerConfig.num_filters()); + } + LayerInputConfig* input = config.layerConfig.add_inputs(); ConvConfig* conv = input->mutable_conv_conf(); conv->set_filter_size(filter_size); @@ -72,12 +84,15 @@ MatrixPtr doOneConvTest(size_t imgSize, conv->set_stride(stride); conv->set_stride_y(stride); conv->set_groups(groups); - conv->set_filter_channels(channel / groups); conv->set_img_size(imgSize); conv->set_output_x(output_x); - config.layerConfig.set_size(conv->output_x() * conv->output_x() * - config.layerConfig.num_filters()); + if (isDeconv) { + conv->set_filter_channels(numfilters / groups); + } else { + conv->set_filter_channels(channel / groups); + } + config.layerConfig.set_name("conv"); std::vector dataLayers; @@ -105,6 +120,8 @@ MatrixPtr doOneConvTest(size_t imgSize, TEST(Layer, convParaUnified) { #ifndef PADDLE_ONLY_CPU MatrixPtr input, resultCpu, resultGpu; + + /// TEST1 for conv /// input = Matrix::create(1, 4 * 4, false, false); real inputData[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; real param[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 8, 7, 6, 5, 4, 3, 2, 1}; @@ -121,7 +138,7 @@ TEST(Layer, convParaUnified) { /*groups*/ 1, input, param, - false); + /*useGpu*/ false); resultGpu = doOneConvTest(/* imgSize */ 4, /* output_x */ 2, @@ -133,9 +150,42 @@ TEST(Layer, convParaUnified) { /*groups*/ 1, input, param, - true); + /*useGpu*/ true); checkMatrixEqual(resultCpu, resultGpu); + /// TEST1 for deconv /// + input = Matrix::create(1, 2 * 2, false, false); + real inputDataT[] = {1, 2, 3, 4}; + input->setData(inputDataT); + + resultCpu = doOneConvTest(/* imgSize */ 4, + /* output_x */ 2, + /* stride */ 1, + /* padding */ 0, + /* filter_size */ 3, + /*channel*/ 1, + /*numfilters*/ 2, + /*groups*/ 1, + input, + param, + /*useGpu*/ false, + /*isDeconv*/ true); + + resultGpu = doOneConvTest(/* imgSize */ 4, + /* output_x */ 2, + /* stride */ 1, + /* padding */ 0, + /* filter_size */ 3, + /*channel*/ 1, + /*numfilters*/ 2, + /*groups*/ 1, + input, + param, + /*useGpu*/ true, + /*isDeconv*/ true); + checkMatrixEqual(resultCpu, resultGpu); + + /// TEST2 for conv /// input = Matrix::create(1, 3 * 3 * 2, false, false); real inputData2[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; @@ -153,7 +203,7 @@ TEST(Layer, convParaUnified) { /*groups*/ 1, input, param2, - false); + /*useGpu*/ false); resultGpu = doOneConvTest(/* imgSize */ 3, /* output_x */ 2, @@ -165,9 +215,10 @@ TEST(Layer, convParaUnified) { /*groups*/ 1, input, param2, - true); + /*useGpu*/ true); checkMatrixEqual(resultCpu, resultGpu); + /// TEST3 for conv /// real param3[] = {1, 2, 3, 4, 4, 3, 2, 1}; resultCpu = doOneConvTest(/* imgSize */ 3, @@ -180,7 +231,66 @@ TEST(Layer, convParaUnified) { /*groups*/ 2, input, param3, - false); + /*useGpu*/ false); + + resultGpu = doOneConvTest(/* imgSize */ 3, + /* output_x */ 2, + /* stride */ 1, + /* padding */ 0, + /* filter_size */ 2, + /*channel*/ 2, + /*numfilters*/ 2, + /*groups*/ 2, + input, + param3, + /*useGpu*/ true); + checkMatrixEqual(resultCpu, resultGpu); + + /// TEST2 for deconv /// + input = Matrix::create(1, 2 * 2 * 2, false, false); + real inputData2T[] = {1, 2, 3, 4, 5, 6, 7, 8}; + input->setData(inputData2T); + + resultCpu = doOneConvTest(/* imgSize */ 3, + /* output_x */ 2, + /* stride */ 1, + /* padding */ 0, + /* filter_size */ 2, + /*channel*/ 2, + /*numfilters*/ 2, + /*groups*/ 1, + input, + param2, + /*useGpu*/ false, + /*isDeconv*/ true); + + resultGpu = doOneConvTest(/* imgSize */ 3, + /* output_x */ 2, + /* stride */ 1, + /* padding */ 0, + /* filter_size */ 2, + /*channel*/ 2, + /*numfilters*/ 2, + /*groups*/ 1, + input, + param2, + /*useGpu*/ true, + /*isDeconv*/ true); + checkMatrixEqual(resultCpu, resultGpu); + + /// TEST3 for deconv /// + resultCpu = doOneConvTest(/* imgSize */ 3, + /* output_x */ 2, + /* stride */ 1, + /* padding */ 0, + /* filter_size */ 2, + /*channel*/ 2, + /*numfilters*/ 2, + /*groups*/ 2, + input, + param3, + /*useGpu*/ false, + /*isDeconv*/ true); resultGpu = doOneConvTest(/* imgSize */ 3, /* output_x */ 2, @@ -192,7 +302,8 @@ TEST(Layer, convParaUnified) { /*groups*/ 2, input, param3, - true); + /*useGpu*/ true, + /*isDeconv*/ true); checkMatrixEqual(resultCpu, resultGpu); #endif } diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index ceb69359c992128635c199e56805d3f603ca4271..0c22896d6e58f8705f4284b95d0a6e132cb8903d 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -166,15 +166,19 @@ TEST(Projection, scaling) { } } -void testProjectionConv(size_t groups) { +void testProjectionConv(size_t groups, bool isDeconv) { const int NUM_FILTERS = 18; const int FILTER_SIZE = 2; - const int FILTER_SIZE_Y = 3; + const int FILTER_SIZE_Y = 4; const int CHANNELS = 3; const int IMAGE_SIZE = 16; ProjectionConfig conf; - conf.set_type("conv"); + if (isDeconv) { + conf.set_type("convt"); + } else { + conf.set_type("conv"); + } conf.set_num_filters(NUM_FILTERS); ConvConfig* conv = conf.mutable_conv_conf(); @@ -186,7 +190,11 @@ void testProjectionConv(size_t groups) { conv->set_stride(2); conv->set_stride_y(2); conv->set_groups(groups); - conv->set_filter_channels(conv->channels() / conv->groups()); + if (isDeconv) { + conv->set_filter_channels(NUM_FILTERS / conv->groups()); + } else { + conv->set_filter_channels(conv->channels() / conv->groups()); + } conv->set_img_size(IMAGE_SIZE); int output_x = outputSize(conv->img_size(), conv->filter_size(), @@ -199,8 +207,14 @@ void testProjectionConv(size_t groups) { conv->stride_y(), /* caffeMode */ true); conv->set_output_x(output_x); - conf.set_input_size(IMAGE_SIZE * IMAGE_SIZE * CHANNELS); - conf.set_output_size(output_x * output_y * NUM_FILTERS); + conv->set_output_y(output_y); + if (isDeconv) { + conf.set_input_size(output_x * output_y * CHANNELS); + conf.set_output_size(IMAGE_SIZE * IMAGE_SIZE * NUM_FILTERS); + } else { + conf.set_input_size(IMAGE_SIZE * IMAGE_SIZE * CHANNELS); + conf.set_output_size(output_x * output_y * NUM_FILTERS); + } testProjectionGrad(conf, INPUT_DATA, @@ -215,8 +229,12 @@ void testProjectionConv(size_t groups) { #ifndef PADDLE_ONLY_CPU TEST(Projection, conv) { - testProjectionConv(1); - testProjectionConv(3); + /// test ConvProjection + testProjectionConv(1, false); + testProjectionConv(3, false); + /// test ConvTransProjection + testProjectionConv(1, true); + testProjectionConv(3, true); } #endif @@ -385,11 +403,11 @@ void testConvTransLayer(const string& type, bool trans, bool useGpu) { config.layerConfig.set_partial_sum(1); config.layerConfig.set_shared_biases(true); - config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 288}); + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 384}); LayerInputConfig* input = config.layerConfig.add_inputs(); ConvConfig* conv = input->mutable_conv_conf(); conv->set_filter_size(2); - conv->set_filter_size_y(3); + conv->set_filter_size_y(4); conv->set_channels(16); conv->set_padding(0); conv->set_padding_y(1); @@ -416,6 +434,9 @@ TEST(Layer, convTransLayer) { for (auto useGpu : {false, true}) { testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ useGpu); } +#ifndef PADDLE_ONLY_CPU + testConvTransLayer("cudnn_convt", /* trans= */ false, /* useGpu= */ true); +#endif } TEST(Layer, blockExpandLayer) { @@ -1482,16 +1503,20 @@ TEST(Layer, BatchNormalizationLayer) { #endif } -TEST(Operator, conv) { +void testConvOperator(bool isDeconv) { TestConfig config; const int NUM_FILTERS = 16; const int FILTER_SIZE = 2; const int FILTER_SIZE_Y = 3; const int CHANNELS = 3; const int IMAGE_SIZE = 16; - const int IMAGE_SIZE_Y = 8; + const int IMAGE_SIZE_Y = 9; OperatorConfig& operatorConf = *config.layerConfig.add_operator_confs(); - operatorConf.set_type("conv"); + if (isDeconv) { + operatorConf.set_type("convt"); + } else { + operatorConf.set_type("conv"); + } ConvConfig* conv = operatorConf.mutable_conv_conf(); operatorConf.set_num_filters(NUM_FILTERS); conv->set_filter_size(FILTER_SIZE); @@ -1502,7 +1527,6 @@ TEST(Operator, conv) { conv->set_stride(2); conv->set_stride_y(2); conv->set_groups(1); - conv->set_filter_channels(conv->channels() / conv->groups()); conv->set_img_size(IMAGE_SIZE); conv->set_img_size_y(IMAGE_SIZE_Y); conv->set_output_x(outputSize(conv->img_size(), @@ -1515,11 +1539,22 @@ TEST(Operator, conv) { conv->padding_y(), conv->stride_y(), /* caffeMode */ true)); - config.layerConfig.set_size(conv->output_x() * conv->output_y() * - NUM_FILTERS); - config.inputDefs.push_back( - {INPUT_DATA, "layer_0", IMAGE_SIZE * IMAGE_SIZE_Y * CHANNELS, 0}); + if (isDeconv) { + conv->set_filter_channels(NUM_FILTERS / conv->groups()); + config.inputDefs.push_back({INPUT_DATA, + "layer_0", + conv->output_x() * conv->output_y() * CHANNELS, + 0}); + config.layerConfig.set_size(IMAGE_SIZE * IMAGE_SIZE_Y * NUM_FILTERS); + } else { + conv->set_filter_channels(conv->channels() / conv->groups()); + config.inputDefs.push_back( + {INPUT_DATA, "layer_0", IMAGE_SIZE * IMAGE_SIZE_Y * CHANNELS, 0}); + config.layerConfig.set_size(conv->output_x() * conv->output_y() * + NUM_FILTERS); + } + config.inputDefs.push_back( {INPUT_DATA, "layer_1", @@ -1531,6 +1566,11 @@ TEST(Operator, conv) { testOperatorGrad(config, operatorConf, 100, /*useGpu*/ true, false); } +TEST(Operator, conv) { + testConvOperator(/*isDeconv*/ true); + testConvOperator(/*isDeconv*/ false); +} + TEST(Layer, FeatureMapExpandLayer) { TestConfig config; config.layerConfig.set_type("featmap_expand"); @@ -1602,6 +1642,39 @@ TEST(Layer, PadLayer) { } } +TEST(Layer, CrossChannelNormLayer) { + TestConfig config; + config.layerConfig.set_type("norm"); + config.layerConfig.set_size(100); + LayerInputConfig* input = config.layerConfig.add_inputs(); + NormConfig* norm = input->mutable_norm_conf(); + norm->set_norm_type("cross-channel-norm"); + norm->set_channels(10); + norm->set_size(100); + norm->set_scale(0); + norm->set_pow(0); + norm->set_blocked(0); + config.inputDefs.push_back({INPUT_DATA, "layer_0", 100, 10}); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "cross-channel-norm", 10, false, useGpu, false, 5); + } +} + +TEST(Layer, smooth_l1) { + TestConfig config; + config.layerConfig.set_type("smooth_l1"); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); + config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 1, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "smooth_l1", 100, false, useGpu, false, 2.0); + } +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); initMain(argc, argv); diff --git a/paddle/math/BaseMatrix.cu b/paddle/math/BaseMatrix.cu index 0a0d92d1ae65f5b6020eb71fe2a6db5a3c625d9c..de48b6fac9c7d8125a552022c52353ef6bcef995 100644 --- a/paddle/math/BaseMatrix.cu +++ b/paddle/math/BaseMatrix.cu @@ -1453,6 +1453,24 @@ void BaseMatrixT::divRowVector(BaseMatrixT& b) { true_type() /* bAsRowVector */, false_type()); } +template +void BaseMatrixT::mulColVector(BaseMatrixT& b) { + MatrixOffset offset(0, 0, 0, 0); + int numRows = height_; + int numCols = width_; + applyBinary(binary::DotMul(), b, numRows, numCols, offset, + false_type(), true_type() /* bAsColVector */); +} + +template +void BaseMatrixT::divColVector(BaseMatrixT& b) { + MatrixOffset offset(0, 0, 0, 0); + int numRows = height_; + int numCols = width_; + applyBinary(binary::DotDiv(), b, numRows, numCols, offset, + false_type(), true_type() /* bAsColVector */); +} + template<> template int BaseMatrixT::applyRow(Agg agg, BaseMatrixT& b) { diff --git a/paddle/math/BaseMatrix.h b/paddle/math/BaseMatrix.h index 8691c87ac3b88499a9676d59af533e0f4713dfc3..6ed48c8d88ee698689de6f7a7f470b97a094ea5b 100644 --- a/paddle/math/BaseMatrix.h +++ b/paddle/math/BaseMatrix.h @@ -545,6 +545,9 @@ public: void mulRowVector(BaseMatrixT& b); void divRowVector(BaseMatrixT& b); + void mulColVector(BaseMatrixT& b); + void divColVector(BaseMatrixT& b); + void addP2P(BaseMatrixT& b); /** diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 07450bfb0ef709840f7e8253e87c227276529a2a..9eead5b62c690b0a3310d8b68bfa3f1870be17c2 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -3590,6 +3590,55 @@ void CpuMatrix::sumOfSquaresBp(Matrix& output, Matrix& label) { } } +void CpuMatrix::smoothL1(Matrix& output, Matrix& label) { + CHECK(output.useGpu_ == false && label.useGpu_ == false) + << "Matrix type are not equal"; + + size_t numSamples = getHeight(); + size_t dim = output.getWidth(); + CHECK_EQ(label.getHeight(), numSamples); + CHECK_EQ(output.getHeight(), numSamples); + CHECK_EQ(label.getWidth(), dim); + CHECK_EQ(getWidth(), (size_t)1); + real* out = output.getData(); + real* cost = getData(); + real* lbl = label.getData(); + + for (size_t i = 0; i < numSamples; ++i, out += dim, cost += dim, lbl += dim) { + for (size_t j = 0; j < dim; ++j) { + cost[j] = std::fabs(out[j] - lbl[j]); + if (cost[j] < 1.0) + cost[j] = 0.5 * cost[j] * cost[j]; + else + cost[j] = cost[j] - 0.5; + } + } +} + +void CpuMatrix::smoothL1Bp(Matrix& output, Matrix& label) { + CHECK(output.useGpu_ == false && label.useGpu_ == false) + << "Matrix type are not equal"; + + size_t numSamples = getHeight(); + size_t dim = output.getWidth(); + CHECK_EQ(label.getHeight(), numSamples); + CHECK_EQ(output.getHeight(), numSamples); + CHECK_EQ(label.getWidth(), dim); + CHECK_EQ(getWidth(), (size_t)1); + real* out = output.getData(); + real* cost = getData(); + real* lbl = label.getData(); + + // f'(x) = x if |x| < 1 + // = sign(x) otherwise + for (size_t i = 0; i < numSamples; ++i, out += dim, cost += dim, lbl += dim) { + for (size_t j = 0; j < dim; ++j) { + cost[j] = out[j] - lbl[j]; + if (std::fabs(cost[j]) >= 1) cost[j] = (0 < cost[j]) - (cost[j] < 0); + } + } +} + void CpuMatrix::tanh(Matrix& output) { CHECK(isContiguous()); CHECK(output.isContiguous()); diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index d0ba2e93feabfcc11ac1d261bc40c9c6973a8c29..dbdb629614546b7c7b569d7473d96a06d0c5a9c7 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -783,6 +783,14 @@ public: LOG(FATAL) << "Not implemented"; } + virtual void smoothL1(Matrix& output, Matrix& label) { + LOG(FATAL) << "Not implemented"; + } + + virtual void smoothL1Bp(Matrix& outputV, Matrix& label) { + LOG(FATAL) << "Not implemented"; + } + virtual void tanh(Matrix& output) { LOG(FATAL) << "Not implemented"; } virtual void tanhDerivative(Matrix& output) { @@ -1720,6 +1728,9 @@ public: /// gradient of sumOfSquares. void sumOfSquaresBp(Matrix& outputV, Matrix& label); + void smoothL1(Matrix& output, Matrix& label); + void smoothL1Bp(Matrix& output, Matrix& label); + void tanh(Matrix& output); void tanhDerivative(Matrix& output); diff --git a/paddle/math/tests/test_BaseMatrix.cpp b/paddle/math/tests/test_BaseMatrix.cpp index 21918b86e1ad98766ceaf09dea3020d6e8592191..22ce39701fca7b650fc03794cb0701e0987d2dae 100644 --- a/paddle/math/tests/test_BaseMatrix.cpp +++ b/paddle/math/tests/test_BaseMatrix.cpp @@ -110,6 +110,8 @@ TEST(BaseMatrix, BaseMatrix) { compare(&BaseMatrix::addRowVector); compare(&BaseMatrix::mulRowVector); compare(&BaseMatrix::divRowVector); + compare(&BaseMatrix::mulColVector); + compare(&BaseMatrix::divColVector); compare(&BaseMatrix::addP2P); compare(&BaseMatrix::invSqrt); } diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index 7a343cca33f5b420be6192231ac73ca1c2da5fb9..4139f59a2c8e665daf410b5b16539ff74b77ecfe 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -123,46 +123,6 @@ static void resizeAndCopy(ICpuGpuVectorPtr& dest, } } -static void resizeAndCopy(UserDefinedVectorPtr& dest, - const UserDefinedVectorPtr& src, - bool useGpu, - hl_stream_t stream) { - if (src) { - CHECK(!useGpu) << "not implemented"; - size_t height = src->size(); - if (!dest) { - dest = std::make_shared>(height); - } else { - dest->resize(height); - } - std::copy_n(src->begin(), height, dest->begin()); - } else { - dest.reset(); - } -} - -static void resizeAndCopy(UserDefinedVectorPtr& dest, - const UserDefinedVectorPtr& src, - int32_t startPos, - int32_t copySize, - bool useGpu, - hl_stream_t stream = HPPL_STREAM_DEFAULT) { - if (src) { - CHECK(!useGpu) << "not implemented"; - CHECK_LE((size_t)startPos + copySize, src->size()); - - size_t height = copySize; - if (!dest) { - dest = std::make_shared>(height); - } else { - dest->resize(height); - } - std::copy_n(src->begin() + startPos, height, dest->begin()); - } else { - dest.reset(); - } -} - static void resizeAndCopy(SVectorPtr& dest, const SVectorPtr& src, bool useGpu, @@ -223,7 +183,6 @@ void Argument::resizeAndCopyFrom(const Argument& src, false /* useGpu */, stream); } - resizeAndCopy(udp, src.udp, useGpu, stream); resizeAndCopy(strs, src.strs, useGpu, stream); frameWidth = src.frameWidth; frameHeight = src.frameHeight; @@ -255,7 +214,6 @@ int32_t Argument::resizeAndCopyFrom(const Argument& src, resizeAndCopy(value, src.value, startRow, copySize, useGpu, stream); resizeAndCopy(grad, src.grad, startRow, copySize, useGpu, stream); resizeAndCopy(ids, src.ids, startRow, copySize, useGpu, stream); - resizeAndCopy(udp, src.udp, startRow, copySize, useGpu, stream); resizeAndCopy(strs, src.strs, startRow, copySize, useGpu, stream); return copySize; } else { @@ -268,7 +226,6 @@ int32_t Argument::resizeAndCopyFrom(const Argument& src, resizeAndCopy(value, src.value, startRow, copyFeatureSize, useGpu, stream); resizeAndCopy(grad, src.grad, startRow, copyFeatureSize, useGpu, stream); resizeAndCopy(ids, src.ids, startRow, copyFeatureSize, useGpu, stream); - resizeAndCopy(udp, src.udp, startRow, copySize, useGpu, stream); resizeAndCopy(sequenceStartPositions, src.sequenceStartPositions, startSeq, @@ -583,7 +540,7 @@ void Argument::checkSubset() const { } } -void Argument::degradeSequence(const Argument& input, bool useGpu) { +void Argument::degradeSequence(const Argument& input) { CHECK_EQ(input.hasSubseq(), 1UL); size_t numSequences = input.getNumSequences(); size_t numSubSequences = input.getNumSubSequences(); diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h index 9ef44be0cb3b960db1e789f3f26bb66d1fe63c81..9fd84bc4b7e0aa54d81f5d5df9e5acb3fbb70d29 100644 --- a/paddle/parameter/Argument.h +++ b/paddle/parameter/Argument.h @@ -24,8 +24,6 @@ limitations under the License. */ namespace paddle { -// vector of user defined pointers -typedef std::shared_ptr> UserDefinedVectorPtr; typedef std::shared_ptr> SVectorPtr; struct Argument { @@ -40,7 +38,6 @@ struct Argument { sequenceStartPositions(nullptr), subSequenceStartPositions(nullptr), cpuSequenceDims(nullptr), - udp(nullptr), deviceId(-1), allCount(0), valueCount(0), @@ -63,7 +60,6 @@ struct Argument { sequenceStartPositions = argument.sequenceStartPositions; subSequenceStartPositions = argument.subSequenceStartPositions; cpuSequenceDims = argument.cpuSequenceDims; - udp = argument.udp; deviceId = argument.deviceId; allCount = argument.allCount; frameHeight = argument.frameHeight; @@ -96,8 +92,6 @@ struct Argument { // dimension of sequence, stored only in CPU IVectorPtr cpuSequenceDims; - UserDefinedVectorPtr udp; // user defined pointer - int deviceId; // the GPU device id which the argument in int allCount; // the number of output layers using this argument mutable int valueCount; // waiting this member when layer do forward @@ -137,7 +131,6 @@ struct Argument { if (ids) return ids->getSize(); if (grad) return grad->getHeight(); if (in) return in->getHeight(); - if (udp) return udp->size(); if (strs) return strs->size(); return 0; } @@ -296,7 +289,7 @@ struct Argument { /* sequence has sub-sequence degrades to a sequence. */ - void degradeSequence(const Argument& input, bool useGpu); + void degradeSequence(const Argument& input); /** * @brief getValueString will return the argument's output in string. There diff --git a/paddle/py_paddle/.gitignore b/paddle/py_paddle/.gitignore index 9e8ad4bf1638a69ab7ef19badfbf867e116548d2..80d1f76fbc05627e21e334af55d63a4a534434c6 100644 --- a/paddle/py_paddle/.gitignore +++ b/paddle/py_paddle/.gitignore @@ -1 +1,2 @@ swig_paddle.py +_swig_paddle.so diff --git a/paddle/scripts/docker/README.md b/paddle/scripts/docker/README.md index 8c35411fc390ef218e395c58808d644e7a35095e..7c90316ad82a6430d6c12d72e07b166b6d9d98a9 100644 --- a/paddle/scripts/docker/README.md +++ b/paddle/scripts/docker/README.md @@ -83,13 +83,18 @@ docker build -t paddle:dev . The `docker build` command assumes that `Dockerfile` is in the root source tree. Note that in this design, this `Dockerfile` is this only one in our repo. +Users can specify a Ubuntu mirror server for faster downloading: + +```bash +docker build -t paddle:dev --build-arg UBUNTU_MIRROR=mirror://mirrors.ubuntu.com/mirrors.txt . +``` ### Build PaddlePaddle from Source Code Given the development image `paddle:dev`, the following command builds PaddlePaddle from the source tree on the development computer (host): ```bash -docker run -v $PWD:/paddle -e "GPU=OFF" -e "AVX=ON" -e "TEST=ON" paddle:dev +docker run -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "WITH_TEST=OFF" -e "RUN_TEST=OFF" paddle:dev ``` This command mounts the source directory on the host into `/paddle` in the container, so the default entry point of `paddle:dev`, `build.sh`, could build the source code with possible local changes. When it writes to `/paddle/build` in the container, it writes to `$PWD/build` on the host indeed. @@ -100,6 +105,14 @@ This command mounts the source directory on the host into `/paddle` in the conta - `$PWD/build/paddle-.deb` for production installation, and - `$PWD/build/Dockerfile`, which builds the production Docker image. +Users can specify the following Docker build arguments with either "ON" or "OFF" value: +- `WITH_GPU`: ***Required***. Generates NVIDIA CUDA GPU code and relies on CUDA libraries. +- `WITH_AVX`: ***Required***. Set to "OFF" prevents from generating AVX instructions. If you don't know what is AVX, you might want to set "ON". +- `WITH_TEST`: ***Optional, default OFF***. Build unit tests binaries. Once you've built the unit tests, you can run these test manually by the following command: + ```bash + docker run -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" paddle:dev sh -c "cd /paddle/build; make coverall" + ``` +- `RUN_TEST`: ***Optional, default OFF***. Run unit tests after building. You can't run unit tests without building it. ### Build the Production Docker Image diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh old mode 100755 new mode 100644 index c44874eede03a8b1060b15e175ad89622f925940..a0da561dfe962b7a0a0515d4104940175ebdecad --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -1,104 +1,78 @@ #!/bin/bash -function abort(){ - echo "An error occurred. Exiting..." 1>&2 - exit 1 -} - -trap 'abort' 0 set -e -mkdir -p /paddle/dist/cpu -mkdir -p /paddle/dist/gpu -mkdir -p /paddle/dist/cpu-noavx -mkdir -p /paddle/dist/gpu-noavx -# Set BASE_IMAGE and DEB_PATH according to env variables + +# Set BASE_IMAGE according to env variables if [ ${WITH_GPU} == "ON" ]; then BASE_IMAGE="nvidia/cuda:7.5-cudnn5-runtime-ubuntu14.04" # additional packages to install when building gpu images - GPU_DOCKER_PKG="python-pip" - if [ ${WITH_AVX} == "ON" ]; then - DEB_PATH="dist/gpu/" - DOCKER_SUFFIX="gpu" - else - DEB_PATH="dist/gpu-noavx/" - DOCKER_SUFFIX="gpu-noavx" - fi + GPU_DOCKER_PKG="python-pip python-dev" else BASE_IMAGE="python:2.7.13-slim" - if [ ${WITH_AVX} == "ON" ]; then - DEB_PATH="dist/cpu/" - DOCKER_SUFFIX="cpu" - else - DEB_PATH="dist/cpu-noavx/" - DOCKER_SUFFIX="noavx" - fi fi -# If Dockerfile.* sets BUILD_AND_INSTALL to 'ON', it would have copied -# source tree to /paddle, and this scripts should build it into -# /paddle/build. -if [[ ${BUILD_AND_INSTALL:-OFF} == 'ON' ]]; then - if [[ ${WITH_GPU:-OFF} == 'ON' ]]; then - ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so /usr/lib/libcudnn.so - fi - mkdir -p /paddle/build # -p means no error if exists - cd /paddle/build - # clean local cmake and third_party cache - if [ ${DELETE_BUILD_CACHE} == 'ON' ]; then - rm -rf * && rm -rf ../third_party - fi - cmake .. \ - -DWITH_DOC=${WITH_DOC:-OFF} \ - -DWITH_GPU=${WITH_GPU:-OFF} \ - -DWITH_AVX=${WITH_AVX:-OFF} \ - -DWITH_SWIG_PY=ON \ - -DCUDNN_ROOT=/usr/ \ - -DWITH_STYLE_CHECK=OFF \ - -DCMAKE_EXPORT_COMPILE_COMMANDS=ON - make -j `nproc` - make install - # generate deb package for current build - # FIXME(typhoonzero): should we remove paddle/scripts/deb ? - # FIXME: CPACK_DEBIAN_PACKAGE_DEPENDS removes all dev dependencies, must - # install them in docker - cpack -D CPACK_GENERATOR='DEB' -D CPACK_DEBIAN_PACKAGE_DEPENDS="" .. - mv /paddle/build/*.deb /paddle/${DEB_PATH} +DOCKERFILE_GPU_ENV="" +if [[ ${WITH_GPU:-OFF} == 'ON' ]]; then + DOCKERFILE_GPU_ENV="ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" - if [[ ${BUILD_WOBOQ:-OFF} == 'ON' ]]; then - apt-get install -y clang-3.8 llvm-3.8 libclang-3.8-dev - # Install woboq_codebrowser. - git clone https://github.com/woboq/woboq_codebrowser /woboq - cd /woboq - cmake -DLLVM_CONFIG_EXECUTABLE=/usr/bin/llvm-config-3.8 \ - -DCMAKE_BUILD_TYPE=Release \ - . - make + # for cmake to find cudnn + ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so /usr/lib/libcudnn.so +fi - export WOBOQ_OUT=/usr/share/nginx/html/paddle - export BUILD_DIR=/paddle/build - mkdir -p $WOBOQ_OUT - cp -rv /woboq/data $WOBOQ_OUT/../data - /woboq/generator/codebrowser_generator \ +mkdir -p /paddle/build +cd /paddle/build + +# build script will not fail if *.deb does not exist +rm *.deb 2>/dev/null || true + +cmake .. \ + -DCMAKE_BUILD_TYPE=Release \ + -DWITH_DOC=${WITH_DOC:-OFF} \ + -DWITH_GPU=${WITH_GPU:-OFF} \ + -DWITH_AVX=${WITH_AVX:-OFF} \ + -DWITH_SWIG_PY=ON \ + -DCUDNN_ROOT=/usr/ \ + -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} \ + -DON_COVERALLS=${WITH_TEST:-OFF} \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON +make -j `nproc` +if [[ ${RUN_TEST:-OFF} == "ON" ]]; then + make coveralls +fi +make install + +# generate deb package for current build +# FIXME(typhoonzero): should we remove paddle/scripts/deb ? +# FIXME: CPACK_DEBIAN_PACKAGE_DEPENDS removes all dev dependencies, must +# install them in docker +cpack -D CPACK_GENERATOR='DEB' -D CPACK_DEBIAN_PACKAGE_DEPENDS="" .. + +if [[ ${BUILD_WOBOQ:-OFF} == 'ON' ]]; then + apt-get install -y clang-3.8 llvm-3.8 libclang-3.8-dev + # Install woboq_codebrowser. + git clone https://github.com/woboq/woboq_codebrowser /woboq + cd /woboq + cmake -DLLVM_CONFIG_EXECUTABLE=/usr/bin/llvm-config-3.8 \ + -DCMAKE_BUILD_TYPE=Release \ + . + make + + export WOBOQ_OUT=/usr/share/nginx/html/paddle + export BUILD_DIR=/paddle/build + mkdir -p $WOBOQ_OUT + cp -rv /woboq/data $WOBOQ_OUT/../data + /woboq/generator/codebrowser_generator \ -b /paddle/build \ -a \ -o $WOBOQ_OUT \ -p paddle:/paddle - /woboq/indexgenerator/codebrowser_indexgenerator $WOBOQ_OUT - cd /woboq - make clean - fi - - pip install /usr/local/opt/paddle/share/wheels/py_paddle*linux*.whl - pip install /usr/local/opt/paddle/share/wheels/paddle*.whl - paddle version - - if [[ ${DOCKER_BUILD:-FALSE} == 'TRUE' ]]; then - # reduce docker image size - rm -rf /paddle/build - rm -rf /usr/local/opt/paddle/share/wheels/ - fi + /woboq/indexgenerator/codebrowser_indexgenerator $WOBOQ_OUT + cd /woboq + make clean fi +paddle version + # generate production docker image Dockerfile if [ ${USE_MIRROR} ]; then MIRROR_UPDATE="sed 's@http:\/\/archive.ubuntu.com\/ubuntu\/@mirror:\/\/mirrors.ubuntu.com\/mirrors.txt@' -i /etc/apt/sources.list && \\" @@ -106,39 +80,23 @@ else MIRROR_UPDATE="\\" fi -cat > /paddle/build/Dockerfile.${DOCKER_SUFFIX} < /paddle/build/Dockerfile < - -# ENV variables -ARG WITH_AVX -ARG WITH_DOC -ARG WITH_STYLE_CHECK - -ENV WITH_GPU=${WITH_GPU} -ENV WITH_AVX=\${WITH_AVX:-ON} -ENV WITH_DOC=\${WITH_DOC:-OFF} -ENV WITH_STYLE_CHECK=\${WITH_STYLE_CHECK:-OFF} - ENV HOME /root ENV LANG en_US.UTF-8 - # Use Fix locales to en_US.UTF-8 - RUN ${MIRROR_UPDATE} apt-get update && \ - apt-get install -y libgfortran3 ${GPU_DOCKER_PKG} && \ + apt-get install -y libgfortran3 libpython2.7 ${GPU_DOCKER_PKG} && \ apt-get clean -y && \ pip install --upgrade pip && \ - pip install -U 'protobuf==3.1.0' requests -RUN pip install numpy + pip install -U 'protobuf==3.1.0' requests numpy # Use different deb file when building different type of images -ADD \$PWD/${DEB_PATH}*.deb /usr/local/opt/paddle/deb/ -RUN dpkg --force-all -i /usr/local/opt/paddle/deb/*.deb && rm -f /usr/local/opt/paddle/deb/*.deb - -ENV PATH="/usr/local/opt/paddle/bin/:${PATH}" +ADD build/*.deb /usr/local/opt/paddle/deb/ +# run paddle version to install python packages first +RUN dpkg -i /usr/local/opt/paddle/deb/*.deb && rm -f /usr/local/opt/paddle/deb/*.deb && paddle version +${DOCKERFILE_GPU_ENV} # default command shows the paddle version and exit CMD ["paddle", "version"] EOF - -trap : 0 diff --git a/paddle/scripts/submit_local.sh.in b/paddle/scripts/submit_local.sh.in index f29d32f0d947dc7cde6112160e4f79ce8113505f..5a45df4072b9197a713bd19ee766296279bfcbc8 100644 --- a/paddle/scripts/submit_local.sh.in +++ b/paddle/scripts/submit_local.sh.in @@ -94,16 +94,22 @@ else: EOF if [ $? -eq 1 ]; then # Older version installed, or not installed at all - echo "First time run paddle, need to install some python dependencies." - BASEDIR=$(dirname "$0") - pip install ${BASEDIR}/../opt/paddle/share/wheels/*-@PADDLE_VERSION@-*.whl - if [ $? -ne 0 ]; then - echo "pip install wheels failed. " - echo "Please use 'sudo paddle' at the first time you use PaddlePaddle" - echo "PaddlePaddle will install some python dependencies automatically." - exit 1 - fi - echo "Python dependencies are installed." + echo "First time run paddle, need to install some python dependencies." + # setuptools normalizes package version, so we need to use normalized + # package version for paddle python package + PYTHON_PADDLE_VERSION=$(python -c 'import packaging +import setuptools +print str(packaging.version.Version("@PADDLE_VERSION@")) +' 2>/dev/null) + BASEDIR=$(dirname "$0") + pip install ${BASEDIR}/../opt/paddle/share/wheels/*-${PYTHON_PADDLE_VERSION}-*.whl + if [ $? -ne 0 ]; then + echo "pip install wheels failed. " + echo "Please use 'sudo paddle' at the first time you use PaddlePaddle" + echo "PaddlePaddle will install some python dependencies automatically." + exit 1 + fi + echo "Python dependencies are installed." fi case "$1" in diff --git a/paddle/scripts/travis/build_and_test.sh b/paddle/scripts/travis/build_and_test.sh index 7deb3e62e88de7e1306fcbfc5a28aa4372d678e6..f2cbc561652a3c7502de94be37d75783fc40b9c1 100755 --- a/paddle/scripts/travis/build_and_test.sh +++ b/paddle/scripts/travis/build_and_test.sh @@ -5,7 +5,7 @@ NPROC=1 export PYTHONPATH=/opt/python/2.7.12/lib/python2.7/site-packages export PYTHONHOME=/opt/python/2.7.12 export PATH=/opt/python/2.7.12/bin:${PATH} -cmake .. -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DON_TRAVIS=ON -DON_COVERALLS=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS} +cmake .. -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DON_TRAVIS=ON -DWITH_COVERAGE=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS} NRPOC=`nproc` make -j $NPROC make coveralls diff --git a/paddle/setup.py.in b/paddle/setup.py.in index 382d5be6ecfc26b4a524bb6a775bd1a805a34d96..0b62436a7f81682d5279c3b307ac1abf09eafffb 100644 --- a/paddle/setup.py.in +++ b/paddle/setup.py.in @@ -12,68 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -# This file is used to build paddle python binding package. -# It will be invoked by Makefile that generated by COMAKE from setuptools import setup, Extension -import numpy as np -import api.paddle_ld_flags -import platform -import os - -system = platform.system().lower() - -is_osx = (system == 'darwin') -is_win = (system == 'windows') -is_lin = (system == 'linux') - - -# The extra links will passed from COMAKE -# because generate paddle LDFLAGS is too complicated to do in setup.py -# it just read COMAKE generated LDFLAGS. -extra_comps = [] -extra_links = [] -obj = api.paddle_ld_flags.PaddleLDFlag() -extra_comps = obj.c_flag() -ldflags = obj.ldflag_str() -if ldflags is not None: - extra_links.extend(ldflags.split(" ")) - -try: - with open('.py_paddle_extra_link_flags', 'r') as f: - for line in f: - extra_links += line.split() -except: - pass - -if is_lin == True: - extra_links = ["-Xlinker", '-start-group'] + extra_links + ["-Xlinker", "-end-group"] -elif is_osx == True: - os.environ["ARCHFLAGS"] = "-arch x86_64" - extra_links = ["-Wl,-all_load"] + extra_links - -include_dirs = [np.get_include(), "../"] # include numpy and paddle. - -os.environ["CC"] = "@CMAKE_C_COMPILER@" -os.environ["CXX"] = "@CMAKE_CXX_COMPILER@" - setup(name="py_paddle", - version="@PADDLE_VERSION@", - ext_modules=[ - Extension('py_paddle._swig_paddle', # Build SWIG Extension. - ['Paddle_wrap.cxx'], - language = "c++", - include_dirs = include_dirs, - extra_link_args = extra_links, - extra_compile_args = extra_comps - ) - ], - packages=['py_paddle'], - include_dirs = include_dirs, - install_requires = [ - 'nltk>=3.2.2', - 'numpy>=1.8.0', # The numpy is required. - 'protobuf>=3.0.0' # The paddle protobuf version - ], + version="${PADDLE_VERSION}", + packages=['py_paddle'], + include_package_data=True, + package_data={'py_paddle':['*.py','_swig_paddle.so']}, + install_requires = [ + 'nltk>=3.2.2', + 'numpy>=1.8.0', # The numpy is required. + 'protobuf>=${PROTOBUF_VERSION}' # The paddle protobuf version + ], + url='http://www.paddlepaddle.org/', + license='Apache 2.0', ) diff --git a/paddle/utils/CpuId.h b/paddle/utils/CpuId.h index 0f3985cc7b2c018ede9bba9644d2d096561dccee..5fc610964d4f5b8064f16ebf1b26bbb002264ce1 100644 --- a/paddle/utils/CpuId.h +++ b/paddle/utils/CpuId.h @@ -12,6 +12,7 @@ limitations under the License. */ #pragma once #include "Common.h" +#include "Error.h" namespace paddle { @@ -97,4 +98,37 @@ private: #define HAS_AVX512 HAS_SIMD(SIMD_AVX512) // clang-format on +/** + * Invoke checkCPUFeature() before Paddle initialization to + * check target machine whether support compiled instructions. + * If not, simply throw out an error. + */ +inline Error __must_check checkCPUFeature() { + Error err; +#ifndef __AVX__ + if (HAS_AVX) { + LOG(WARNING) << "PaddlePaddle wasn't compiled to use avx instructions, " + << "but these are available on your machine and could " + << "speed up CPU computations via CMAKE .. -DWITH_AVX=ON"; + } +#else + if (!HAS_AVX) { + err = Error( + "PaddlePaddle was compiled to use avx instructions, " + "but these aren't available on your machine, please " + "disable it via CMAKE .. -DWITH_AVX=OFF"); + } +#endif // __AVX__ +#ifdef __SSE3__ + if (!HAS_SSE3) { + err = Error( + "PaddlePaddle was compiled to use sse3 instructions, " + "which is the minimum requirement of PaddlePaddle. " + "But these aren't available on your current machine."); + } +#endif // __SSE3__ + + return err; +} + } // namespace paddle diff --git a/paddle/utils/PythonUtil.cpp.in b/paddle/utils/PythonUtil.cpp.in index 66b5795e29fb9fa751ed802e87ced0a71aea4c51..a51b8f765f41f6febb86002f371b14e8797e7e4d 100644 --- a/paddle/utils/PythonUtil.cpp.in +++ b/paddle/utils/PythonUtil.cpp.in @@ -195,9 +195,14 @@ extern const char enable_virtualenv_py[]; } void initPython(int argc, char** argv) { #ifndef PADDLE_NO_PYTHON - char pyHome[] = "@PYTHON_INSTALL_DIR@"; // NOLINT - if (strlen(pyHome)) { - Py_SetPythonHome(pyHome); + std::string pyHome; +#if defined(__APPLE__) || defined(__OSX__) + pyHome = "/usr/local/Frameworks/Python.framework/Versions/2.7"; + Py_SetPythonHome(const_cast(pyHome.c_str())); +#endif + pyHome = "@PYTHON_INSTALL_DIR@"; // NOLINT + if (!pyHome.empty()) { + Py_SetPythonHome(const_cast(pyHome.c_str())); } Py_SetProgramName(argv[0]); Py_Initialize(); diff --git a/paddle/utils/Util.cpp b/paddle/utils/Util.cpp index dbab4ec43ca2fa691445131d2cb14f51721a2e4c..1f56b6b8a96602d298507452fc7182d46179de41 100644 --- a/paddle/utils/Util.cpp +++ b/paddle/utils/Util.cpp @@ -26,6 +26,7 @@ limitations under the License. */ #include +#include "CpuId.h" #include "CustomStackTrace.h" #include "Logging.h" #include "StringUtil.h" @@ -185,6 +186,7 @@ void initMain(int argc, char** argv) { } version::printVersion(); + checkCPUFeature().check(); runInitFunctions(); } diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index e257aa568facb1555944dba7e76c5d8bce7f1c7d..77361f8bc751446d89d8a812f48d33cd3dffc665 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -686,25 +686,17 @@ class ContextProjection(Projection): @config_class -class ConvProjection(Projection): - type = 'conv' - +class ConvBaseProjection(Projection): def __init__(self, input_layer_name, num_filters=None, conv_conf=None, **xargs): - super(ConvProjection, self).__init__(input_layer_name, **xargs) + super(ConvBaseProjection, self).__init__(input_layer_name, **xargs) if num_filters is not None: self.proj_conf.num_filters = num_filters - parse_conv(conv_conf, input_layer_name, self.proj_conf.conv_conf, - num_filters) - self.proj_conf.output_size = self.proj_conf.conv_conf.output_x * \ - self.proj_conf.conv_conf.output_y * \ - num_filters - def calc_output_size(self, input_layer_config): return self.proj_conf.output_size @@ -723,6 +715,48 @@ class ConvProjection(Projection): return None +@config_class +class ConvProjection(ConvBaseProjection): + type = 'conv' + + def __init__(self, + input_layer_name, + num_filters=None, + conv_conf=None, + **xargs): + super(ConvProjection, self).__init__(input_layer_name, num_filters, + conv_conf, **xargs) + + parse_conv(conv_conf, self.input_layer_name, self.proj_conf.conv_conf, + num_filters) + self.proj_conf.output_size = self.proj_conf.conv_conf.output_x * \ + self.proj_conf.conv_conf.output_y * \ + num_filters + + +@config_class +class ConvTransProjection(ConvBaseProjection): + type = 'convt' + + def __init__(self, + input_layer_name, + num_filters=None, + conv_conf=None, + **xargs): + super(ConvTransProjection, self).__init__(input_layer_name, num_filters, + conv_conf, **xargs) + + parse_conv( + conv_conf, + self.input_layer_name, + self.proj_conf.conv_conf, + num_filters, + trans=True) + self.proj_conf.output_size = self.proj_conf.conv_conf.img_size_y * \ + self.proj_conf.conv_conf.img_size * \ + num_filters + + # Define a operator for mixed layer @config_class class Operator(Cfg): @@ -789,6 +823,36 @@ class ConvOperator(Operator): return self.operator_conf.output_size +@config_class +class ConvTransOperator(Operator): + type = 'convt' + + def __init__(self, + input_layer_names, + num_filters=None, + conv_conf=None, + **xargs): + super(ConvTransOperator, self).__init__(input_layer_names, **xargs) + if num_filters is not None: + self.operator_conf.num_filters = num_filters + + parse_conv( + conv_conf, + MakeLayerNameInSubmodel(input_layer_names[0]), + self.operator_conf.conv_conf, + num_filters, + trans=True) + self.operator_conf.output_size = \ + self.operator_conf.conv_conf.img_size * \ + self.operator_conf.conv_conf.img_size_y * \ + num_filters + + config_assert(len(input_layer_names) == 2, "Conv is binary operator") + + def calc_output_size(self, input_sizes): + return self.operator_conf.output_size + + # please refer to the comments in proto/ModelConfig.proto @config_class class Conv(Cfg): @@ -1156,9 +1220,11 @@ def parse_image(image, input_layer_name, image_conf): def parse_norm(norm, input_layer_name, norm_conf): norm_conf.norm_type = norm.norm_type - config_assert(norm.norm_type in ['rnorm', 'cmrnorm-projection'], - "norm-type %s is not in [rnorm, 'cmrnorm-projection']" % - norm.norm_type) + config_assert( + norm.norm_type in + ['rnorm', 'cmrnorm-projection', 'cross-channel-norm'], + "norm-type %s is not in [rnorm, cmrnorm-projection, cross-channel-norm]" + % norm.norm_type) norm_conf.channels = norm.channels norm_conf.size = norm.size norm_conf.scale = norm.scale @@ -1772,8 +1838,17 @@ class ConvTransLayerBase(LayerBase): use_gpu = int(g_command_config_args.get("use_gpu", 0)) parallel_nn = int(g_command_config_args.get("parallel_nn", 0)) - # cudnn_convt has not been implemented so use exconvt only - self.layer_type = "exconvt" + # Automatically select cudnn_type for GPU and exconvt for CPU + # if set type=exconvt, but still reserve the way user specify + # exconvt or cudnn_convt manually. + if self.layer_type == "cudnn_convt": + config_assert(use_gpu, "cudnn_convt only support GPU") + + if (use_gpu == 1 and self.layer_type != "exconvt" and + (parallel_nn == 0 or self.config.device > -1)): + self.layer_type = "cudnn_convt" + else: + self.layer_type = "exconvt" # need to specify layer in config self.config.type = self.layer_type @@ -1790,10 +1865,9 @@ class ConvTransLayerBase(LayerBase): trans=True) conv_conf = self.config.inputs[input_index].conv_conf psize = self.calc_parameter_size(conv_conf) - print("output size for %s is %d " % (name, conv_conf.output_x)) self.create_input_parameter(input_index, psize) - self.set_layer_size( - (conv_conf.img_size**2) * self.config.num_filters) + self.set_cnn_layer(name, conv_conf.img_size_y, conv_conf.img_size, + self.config.num_filters) psize = self.config.size if shared_biases: @@ -1810,6 +1884,11 @@ class ConvTransLayer(ConvTransLayerBase): layer_type = 'exconvt' +@config_layer('cudnn_convt') +class ConvTransLayer(ConvTransLayerBase): + layer_type = 'cudnn_convt' + + @config_layer('norm') class NormLayer(LayerBase): def __init__(self, name, inputs, **xargs): @@ -1821,6 +1900,9 @@ class NormLayer(LayerBase): norm_conf) self.set_cnn_layer(name, norm_conf.output_y, norm_conf.output_x, norm_conf.channels, False) + if norm_conf.norm_type == "cross-channel-norm": + self.create_input_parameter(0, norm_conf.channels, + [norm_conf.channels, 1]) @config_layer('pool') @@ -2222,7 +2304,10 @@ def Link( # memory for recurrent layer group. # *name* and *size* are actual layer's name and size. -# will return name of the memory, +# If *name* is None, need to provide *memory_name* and need to use +# SetMemoryInput() later to specify the layer which this memory remembers. +# +# return the name of the memory, # use this name if you assign the memory as other layer's input # # boot frame of memory is zeroed by default, @@ -2234,15 +2319,18 @@ def Link( # can only be initailized by a *boot_layer* which is a sequence. # @config_func -def Memory( - name, - size, - is_sequence=False, - boot_layer=None, - boot_bias=False, - boot_bias_active_type="", - boot_with_const_id=None, ): - agent_name = name + "+delay1" +def Memory(name, + size, + is_sequence=False, + boot_layer=None, + boot_bias=False, + boot_bias_active_type="", + boot_with_const_id=None, + memory_name=None): + if not memory_name: + config_assert(name is not None, "name needs cannot be None") + memory_name = name + "+delay1" + agent_name = memory_name if is_sequence: agent_layer = SequenceAgentLayer(agent_name, size) else: @@ -2250,7 +2338,8 @@ def Memory( config_assert(g_current_submodel.is_recurrent_layer_group, 'Memory should be used in recurrent layer group only') memory = g_current_submodel.memories.add() - memory.layer_name = MakeLayerNameInSubmodel(name) + if name is not None: + memory.layer_name = MakeLayerNameInSubmodel(name) memory.link_name = MakeLayerNameInSubmodel(agent_name) memory.is_sequence = is_sequence options = sum((boot_layer is not None, bool(boot_bias), @@ -2274,6 +2363,17 @@ def Memory( return agent_name +@config_func +def SetMemoryInput(memory_name, layer_name): + memory_name = MakeLayerNameInSubmodel(memory_name) + layer_name = MakeLayerNameInSubmodel(layer_name) + for mem in g_current_submodel.memories: + if mem.link_name == memory_name: + mem.layer_name = layer_name + return + logger.fatal("Nonexistent memory name: " + memory_name) + + # Generator for recurrent layer group, to use it: # 1. define a id layer as output of layer group # 2. define a memory of this id layer, and assign a boot id(begin of sequence) diff --git a/python/paddle/trainer_config_helpers/default_decorators.py b/python/paddle/trainer_config_helpers/default_decorators.py index 2f25579fcdd9793e4c165439c9934a2bccb63617..69d860d9dab9c1d90e4d6a6940d66fcb551f6eb6 100644 --- a/python/paddle/trainer_config_helpers/default_decorators.py +++ b/python/paddle/trainer_config_helpers/default_decorators.py @@ -97,13 +97,13 @@ def reset_hook(): register_parse_config_hook(reset_hook) -def wrap_name_default(name_prefix=None): +def wrap_name_default(name_prefix=None, name_param="name"): """ Decorator to set "name" arguments default to "{name_prefix}_{invoke_count}". .. code:: python - @default_name("some_name") + @wrap_name_default("some_name") def func(name=None): print name # name will never be None. If name is not set, # name will be "some_name_%d" @@ -115,7 +115,7 @@ def wrap_name_default(name_prefix=None): """ factory = DefaultNameFactory(name_prefix) _name_factories.append(factory) - return wrap_param_default(["name"], factory) + return wrap_param_default([name_param], factory) def wrap_param_attr_default(param_names=None, default_factory=None): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 7cd3ce91312b86c96e46530e45ff9427db0a0a45..8d2329292b5b8b408473c2e33fc43b2e586d89b6 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -112,6 +112,7 @@ __all__ = [ 'out_prod_layer', 'print_layer', 'priorbox_layer', + 'cross_channel_norm_layer', 'spp_layer', 'pad_layer', 'eos_layer', @@ -288,6 +289,14 @@ class LayerOutput(object): """ assert False, "this method should not be invoked" + def set_input(self, input): + """ + Set the input for a memory layer. Can only be used for memory layer + """ + assert isinstance(input, LayerOutput) + assert self.layer_type == LayerType.MEMORY + SetMemoryInput(self.name, input.name) + ERROR_CLIPPING = 'error_clipping_threshold' DROPOUT = 'drop_rate' @@ -704,8 +713,9 @@ class MixedLayerType(LayerOutput): assert len(self.inputs) == 0 return self - def __exit__(self, *args, **kwargs): - del args, kwargs # unused parameter to suppress warning + def __exit__(self, exc_type, exc_value, tb): + if exc_value is not None: + raise exc_value assert len(self.inputs) != 0 ml = MixedLayer( name=self.name, @@ -999,6 +1009,46 @@ def priorbox_layer(input, size=size) +@wrap_name_default("cross_channel_norm") +def cross_channel_norm_layer(input, name=None, param_attr=None): + """ + Normalize a layer's output. This layer is necessary for ssd. + This layer applys normalize across the channels of each sample to + a conv layer's output and scale the output by a group of trainable + factors which dimensions equal to the channel's number. + + :param name: The Layer Name. + :type name: basestring + :param input: The input layer. + :type input: LayerOutput + :param param_attr: The Parameter Attribute|list. + :type param_attr: ParameterAttribute + :return: LayerOutput + """ + assert input.num_filters is not None + Layer( + name=name, + type=LayerType.NORM_LAYER, + inputs=[ + Input( + input.name, + norm=Norm( + norm_type="cross-channel-norm", + channels=input.num_filters, + size=input.size, + scale=0, + pow=0, + blocked=0), + **param_attr.attr) + ]) + return LayerOutput( + name, + LayerType.NORM_LAYER, + parents=input, + num_filters=input.num_filters, + size=input.size) + + @wrap_name_default("seq_pooling") @wrap_bias_attr_default(has_bias=False) @wrap_param_default(['pooling_type'], default_factory=lambda _: MaxPooling()) @@ -2036,8 +2086,9 @@ def img_conv_layer(input, :param trans: true if it is a convTransLayer, false if it is a convLayer :type trans: bool :param layer_type: specify the layer_type, default is None. If trans=True, - layer_type has to be "exconvt", otherwise layer_type - has to be either "exconv" or "cudnn_conv" + layer_type has to be "exconvt" or "cudnn_convt", + otherwise layer_type has to be either "exconv" or + "cudnn_conv" :type layer_type: String :return: LayerOutput object. :rtype: LayerOutput @@ -2077,7 +2128,7 @@ def img_conv_layer(input, if layer_type: if trans: - assert layer_type in ["exconvt"] + assert layer_type in ["exconvt", "cudnn_convt"] else: assert layer_type in ["exconv", "cudnn_conv"] lt = layer_type @@ -2759,8 +2810,10 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None, size=a.size) +@wrap_name_default("memory", "memory_name") def memory(name, size, + memory_name=None, is_seq=False, boot_layer=None, boot_bias=None, @@ -2782,14 +2835,32 @@ def memory(name, If boot_layer is not null, the memory is just the boot_layer's output. Set :code:`is_seq` is true boot layer is sequence. - The same name layer in recurrent group will set memory on each time step. - :param name: memory's name. + .. code-block:: python + + mem = memory(size=256, name='state') + state = fc_layer(input=mem, size=256, name='state') + + If you do not want to specify the name, you can equivalently use set_input() + to specify the layer needs to be remembered as the following: + + .. code-block:: python + mem = memory(size=256) + state = fc_layer(input=mem, size=256) + mem.set_input(mem) + + + :param name: the name of the layer which this memory remembers. + If name is None, user should call set_input() to specify the + name of the layer which this memory remembers. :type name: basestring :param size: size of memory. :type size: int + :param memory_name: the name of the memory. + It is ignored when name is provided. + :type memory_name: basestring :param is_seq: is sequence for boot_layer :type is_seq: bool :param boot_layer: boot layer of memory. @@ -2811,13 +2882,21 @@ def memory(name, boot_bias = ParamAttr.to_bias(boot_bias) assert boot_layer is None or isinstance(boot_layer, LayerOutput) + if name is not None: + memory_name = None - agent_name = Memory(name, size, is_seq, boot_layer.name - if boot_layer is not None else None, boot_bias, - boot_bias_active_type.name, boot_with_const_id) + memory_name = Memory( + name, + size, + is_sequence=is_seq, + boot_layer=boot_layer.name if boot_layer is not None else None, + boot_bias=boot_bias, + boot_bias_active_type=boot_bias_active_type.name, + boot_with_const_id=boot_with_const_id, + memory_name=memory_name) lout = LayerOutput( - name=agent_name, + name=memory_name, size=size, layer_type=LayerType.MEMORY, parents=[boot_layer] if boot_layer is not None else None) @@ -3565,7 +3644,7 @@ def __cost_input__(input, label, weight=None): ipts = [Input(input.name), Input(label.name)] parents = [input, label] if weight is not None: - assert weight.layer_type == LayerType.DATA + assert weight.size == 1 ipts.append(Input(weight.name)) parents.append(weight) return ipts, parents @@ -3679,7 +3758,8 @@ def conv_operator(img, padding=0, filter_size_y=None, stride_y=None, - padding_y=None): + padding_y=None, + trans=False): """ Different from img_conv_layer, conv_op is an Operator, which can be used in mixed_layer. And conv_op takes two inputs to perform convolution. @@ -3735,7 +3815,9 @@ def conv_operator(img, if filter.size is not None: filter.size = filter_size * filter_size_y * num_filters * num_channels - op = ConvOperator( + opCls = ConvTransOperator if trans else ConvOperator + + op = opCls( input_layer_names=[img.name, filter.name], num_filters=num_filters, conv_conf=Conv( @@ -3747,6 +3829,7 @@ def conv_operator(img, padding_y=padding_y, stride_y=stride_y, groups=1)) + op.origin = [img, filter] return op @@ -3762,7 +3845,8 @@ def conv_projection(input, stride_y=None, padding_y=None, groups=1, - param_attr=None): + param_attr=None, + trans=False): """ Different from img_conv_layer and conv_op, conv_projection is an Projection, which can be used in mixed_layer and conat_layer. It use cudnn to implement @@ -3801,6 +3885,8 @@ def conv_projection(input, :type groups: int :param param_attr: Convolution param attribute. None means default attribute :type param_attr: ParameterAttribute + :param trans: whether it is convTrans or conv + :type trans: boolean :return: A DotMulProjection Object. :rtype: DotMulProjection """ @@ -3837,7 +3923,9 @@ def conv_projection(input, param_attr.attr["initial_strategy"] = 0 param_attr.attr["initial_smart"] = False - proj = ConvProjection( + projCls = ConvTransProjection if trans else ConvProjection + + proj = projCls( input_layer_name=input.name, num_filters=num_filters, conv_conf=Conv( @@ -4946,7 +5034,12 @@ def lambda_cost(input, @wrap_name_default() @layer_support() -def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None): +def cross_entropy(input, + label, + name=None, + coeff=1.0, + weight=None, + layer_attr=None): """ A loss layer for multi class entropy. @@ -4961,22 +5054,27 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None): :type input: LayerOutput. :param name: The name of this layers. It is not necessary. :type name: None|basestring. - :param coeff: The coefficient affects the gradient in the backward. + :param coeff: The cost is multiplied with coeff. + The coefficient affects the gradient in the backward. :type coeff: float. + :param weight: The cost of each sample is multiplied with each weight. + The weight should be a layer with size=1. Note that gradient + will not be calculated for weight. + :type weight: LayerOutout :param layer_attr: Extra Layer Attribute. :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput. """ + ipts, parents = __cost_input__(input, label, weight) Layer( name=name, type=LayerType.CROSS_ENTROPY, - inputs=[input.name, label.name], + inputs=ipts, coeff=coeff, **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput( - name, LayerType.CROSS_ENTROPY, parents=[input, label], size=1) + return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=parents, size=1) @wrap_name_default() diff --git a/python/paddle/trainer_config_helpers/tests/configs/projections.py b/python/paddle/trainer_config_helpers/tests/configs/projections.py index aa4521dcd5db3f845871cfaaedb02a86bcbddc38..dc8975cb311582a621eb4a5a166ddc34348fe3e9 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/projections.py +++ b/python/paddle/trainer_config_helpers/tests/configs/projections.py @@ -34,11 +34,31 @@ flt = data_layer(name='filter', size=3 * 3 * 1 * 64) with mixed_layer() as m7: m7 += conv_operator( img=img, filter=flt, num_filters=64, num_channels=1, filter_size=3) + m7 += conv_projection(img, filter_size=3, num_filters=64, num_channels=1) +with mixed_layer() as m8: + m8 += conv_operator( + img=img, + filter=flt, + num_filters=64, + num_channels=1, + filter_size=3, + stride=2, + padding=1, + trans=True) + m8 += conv_projection( + img, + filter_size=3, + num_filters=64, + num_channels=1, + stride=2, + padding=1, + trans=True) end = mixed_layer( input=[ full_matrix_projection(input=m5), - trans_full_matrix_projection(input=m6), full_matrix_projection(input=m7) + trans_full_matrix_projection(input=m6), + full_matrix_projection(input=m7), full_matrix_projection(input=m8) ], size=100, layer_attr=ExtraAttr( diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr index 6934fd0da62f90f9bbddef9a98798bf168f7fa8e..2818389b16cca75f5030b75fc4de8c89c06c5e02 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr @@ -33,6 +33,8 @@ layers { bias_parameter_name: "___conv_0__.wbias" num_filters: 64 shared_biases: true + height: 256 + width: 256 } layers { name: "__batch_norm_0__" @@ -58,6 +60,8 @@ layers { } bias_parameter_name: "___batch_norm_0__.wbias" moving_average_fraction: 0.9 + height: 256 + width: 256 } layers { name: "__crmnorm_0__" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr index 2943ab130bd7d6f3b78ea611f1c35850ccaf5e92..2afc3afef6d39ce9b8eef05948861284775d5011 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr @@ -154,13 +154,40 @@ layers { inputs { input_layer_name: "img" } + inputs { + input_layer_name: "img" + input_parameter_name: "___mixed_6__.w1" + proj_conf { + type: "conv" + name: "___mixed_6__.w1" + input_size: 1024 + output_size: 57600 + conv_conf { + filter_size: 3 + channels: 1 + stride: 1 + padding: 0 + groups: 1 + filter_channels: 1 + output_x: 30 + img_size: 32 + caffe_mode: true + filter_size_y: 3 + padding_y: 0 + stride_y: 1 + output_y: 30 + img_size_y: 32 + } + num_filters: 64 + } + } inputs { input_layer_name: "filter" } operator_confs { type: "conv" input_indices: 0 - input_indices: 1 + input_indices: 2 input_sizes: 1024 input_sizes: 576 output_size: 57600 @@ -186,38 +213,112 @@ layers { layers { name: "__mixed_7__" type: "mixed" + size: 254016 + active_type: "" + inputs { + input_layer_name: "img" + } + inputs { + input_layer_name: "img" + input_parameter_name: "___mixed_7__.w1" + proj_conf { + type: "convt" + name: "___mixed_7__.w1" + input_size: 1024 + output_size: 254016 + conv_conf { + filter_size: 3 + channels: 1 + stride: 2 + padding: 1 + groups: 1 + filter_channels: 64 + output_x: 32 + img_size: 63 + caffe_mode: true + filter_size_y: 3 + padding_y: 1 + stride_y: 2 + output_y: 32 + img_size_y: 63 + } + num_filters: 64 + } + } + inputs { + input_layer_name: "filter" + } + operator_confs { + type: "convt" + input_indices: 0 + input_indices: 2 + input_sizes: 1024 + input_sizes: 576 + output_size: 254016 + conv_conf { + filter_size: 3 + channels: 1 + stride: 2 + padding: 1 + groups: 1 + filter_channels: 64 + output_x: 32 + img_size: 63 + caffe_mode: true + filter_size_y: 3 + padding_y: 1 + stride_y: 2 + output_y: 32 + img_size_y: 63 + } + num_filters: 64 + } +} +layers { + name: "__mixed_8__" + type: "mixed" size: 100 active_type: "" inputs { input_layer_name: "__mixed_4__" - input_parameter_name: "___mixed_7__.w0" + input_parameter_name: "___mixed_8__.w0" proj_conf { type: "fc" - name: "___mixed_7__.w0" + name: "___mixed_8__.w0" input_size: 300 output_size: 100 } } inputs { input_layer_name: "__mixed_5__" - input_parameter_name: "___mixed_7__.w1" + input_parameter_name: "___mixed_8__.w1" proj_conf { type: "trans_fc" - name: "___mixed_7__.w1" + name: "___mixed_8__.w1" input_size: 100 output_size: 100 } } inputs { input_layer_name: "__mixed_6__" - input_parameter_name: "___mixed_7__.w2" + input_parameter_name: "___mixed_8__.w2" proj_conf { type: "fc" - name: "___mixed_7__.w2" + name: "___mixed_8__.w2" input_size: 57600 output_size: 100 } } + inputs { + input_layer_name: "__mixed_7__" + input_parameter_name: "___mixed_8__.w3" + proj_conf { + type: "fc" + name: "___mixed_8__.w3" + input_size: 254016 + output_size: 100 + } + } drop_rate: 0.5 } parameters { @@ -281,7 +382,23 @@ parameters { initial_smart: true } parameters { - name: "___mixed_7__.w0" + name: "___mixed_6__.w1" + size: 576 + initial_mean: 0.0 + initial_std: 0.471404520791 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___mixed_7__.w1" + size: 576 + initial_mean: 0.0 + initial_std: 0.471404520791 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___mixed_8__.w0" size: 30000 initial_mean: 0.0 initial_std: 0.057735026919 @@ -291,7 +408,7 @@ parameters { initial_smart: true } parameters { - name: "___mixed_7__.w1" + name: "___mixed_8__.w1" size: 10000 initial_mean: 0.0 initial_std: 0.1 @@ -301,7 +418,7 @@ parameters { initial_smart: true } parameters { - name: "___mixed_7__.w2" + name: "___mixed_8__.w2" size: 5760000 initial_mean: 0.0 initial_std: 0.00416666666667 @@ -310,10 +427,20 @@ parameters { initial_strategy: 0 initial_smart: true } +parameters { + name: "___mixed_8__.w3" + size: 25401600 + initial_mean: 0.0 + initial_std: 0.00198412698413 + dims: 254016 + dims: 100 + initial_strategy: 0 + initial_smart: true +} input_layer_names: "test" input_layer_names: "img" input_layer_names: "filter" -output_layer_names: "__mixed_7__" +output_layer_names: "__mixed_8__" sub_models { name: "root" layer_names: "test" @@ -328,10 +455,11 @@ sub_models { layer_names: "filter" layer_names: "__mixed_6__" layer_names: "__mixed_7__" + layer_names: "__mixed_8__" input_layer_names: "test" input_layer_names: "img" input_layer_names: "filter" - output_layer_names: "__mixed_7__" + output_layer_names: "__mixed_8__" is_recurrent_layer_group: false } diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr index 3e9d28416ed5066461e960f0a9f085e057c28346..a0fb729e062bdf6fd7d2a7c2ae364d1a2b32811d 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr @@ -331,6 +331,54 @@ layers { } trans_type: "non-seq" } +layers { + name: "__recurrent_group_3__" + type: "recurrent_layer_group" + active_type: "" +} +layers { + name: "seq_input@__recurrent_group_3__" + type: "scatter_agent" + size: 100 + active_type: "" +} +layers { + name: "__memory_6__@__recurrent_group_3__" + type: "agent" + size: 200 + active_type: "" +} +layers { + name: "__fc_layer_0__@__recurrent_group_3__" + type: "fc" + size: 200 + active_type: "tanh" + inputs { + input_layer_name: "seq_input@__recurrent_group_3__" + input_parameter_name: "___fc_layer_0__@__recurrent_group_3__.w0" + } + inputs { + input_layer_name: "__memory_6__@__recurrent_group_3__" + input_parameter_name: "___fc_layer_0__@__recurrent_group_3__.w1" + } + bias_parameter_name: "___fc_layer_0__@__recurrent_group_3__.wbias" +} +layers { + name: "__fc_layer_0__" + type: "gather_agent" + size: 200 + active_type: "" +} +layers { + name: "__last_seq_4__" + type: "seqlastins" + size: 200 + active_type: "linear" + inputs { + input_layer_name: "__fc_layer_0__" + } + trans_type: "non-seq" +} parameters { name: "___mixed_0__.w0" size: 40000 @@ -481,6 +529,36 @@ parameters { initial_strategy: 0 initial_smart: false } +parameters { + name: "___fc_layer_0__@__recurrent_group_3__.w0" + size: 20000 + initial_mean: 0.0 + initial_std: 0.1 + dims: 100 + dims: 200 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___fc_layer_0__@__recurrent_group_3__.w1" + size: 40000 + initial_mean: 0.0 + initial_std: 0.0707106781187 + dims: 200 + dims: 200 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___fc_layer_0__@__recurrent_group_3__.wbias" + size: 200 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 200 + initial_strategy: 0 + initial_smart: false +} input_layer_names: "seq_input" input_layer_names: "sub_seq_input" output_layer_names: "__last_seq_0__" @@ -488,6 +566,7 @@ output_layer_names: "__first_seq_0__" output_layer_names: "__last_seq_1__" output_layer_names: "__last_seq_2__" output_layer_names: "__last_seq_3__" +output_layer_names: "__last_seq_4__" sub_models { name: "root" layer_names: "seq_input" @@ -510,6 +589,9 @@ sub_models { layer_names: "__gru_group_0___recurrent_group" layer_names: "__gru_group_0__" layer_names: "__last_seq_3__" + layer_names: "__recurrent_group_3__" + layer_names: "__fc_layer_0__" + layer_names: "__last_seq_4__" input_layer_names: "seq_input" input_layer_names: "sub_seq_input" output_layer_names: "__last_seq_0__" @@ -517,6 +599,7 @@ sub_models { output_layer_names: "__last_seq_1__" output_layer_names: "__last_seq_2__" output_layer_names: "__last_seq_3__" + output_layer_names: "__last_seq_4__" is_recurrent_layer_group: false } sub_models { @@ -647,4 +730,28 @@ sub_models { } target_inlinkid: -1 } +sub_models { + name: "__recurrent_group_3__" + layer_names: "seq_input@__recurrent_group_3__" + layer_names: "__memory_6__@__recurrent_group_3__" + layer_names: "__fc_layer_0__@__recurrent_group_3__" + is_recurrent_layer_group: true + reversed: false + memories { + layer_name: "__fc_layer_0__@__recurrent_group_3__" + link_name: "__memory_6__@__recurrent_group_3__" + is_sequence: false + } + in_links { + layer_name: "seq_input" + link_name: "seq_input@__recurrent_group_3__" + has_subseq: false + } + out_links { + layer_name: "__fc_layer_0__@__recurrent_group_3__" + link_name: "__fc_layer_0__" + has_subseq: false + } + target_inlinkid: -1 +} diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py b/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py index 60b4849d69d497109ef5af3257e212df233a2d0b..91010759e4847f087eb4e05ad98ae794a2129365 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py +++ b/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py @@ -16,6 +16,16 @@ def generate_rnn_simple(name): return rnn_simple +def generate_rnn_simple_no_name(): + def rnn_simple(s): + m = memory(name=None, size=200) + fc = fc_layer(input=[s, m], size=200) + m.set_input(fc) + return fc + + return rnn_simple + + with mixed_layer() as lstm_param: # test lstm unit, rnn group lstm_param += full_matrix_projection(input=seq, size=100 * 4) @@ -33,4 +43,6 @@ outputs( last_seq(input=lstmemory_group( input=lstm_param, size=100)), last_seq(input=gru_group( - input=gru_param, size=100))) + input=gru_param, size=100)), + last_seq(input=recurrent_group( + step=generate_rnn_simple_no_name(), input=seq)), ) diff --git a/python/paddle/v2/dataset/cifar.py b/python/paddle/v2/dataset/cifar.py index d9f7a830ee60a331b55a1e218923e690103e1c5b..3a8b98b8f045b0eb58be69649486cbd0a571f118 100644 --- a/python/paddle/v2/dataset/cifar.py +++ b/python/paddle/v2/dataset/cifar.py @@ -20,7 +20,7 @@ TODO(yuyang18): Complete the comments. import cPickle import itertools import numpy -import paddle.v2.dataset.common +from common import download import tarfile __all__ = ['train100', 'test100', 'train10', 'test10'] @@ -55,23 +55,23 @@ def reader_creator(filename, sub_name): def train100(): return reader_creator( - paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5), - 'train') + download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'train') def test100(): - return reader_creator( - paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5), - 'test') + return reader_creator(download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'test') def train10(): return reader_creator( - paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), - 'data_batch') + download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'data_batch') def test10(): return reader_creator( - paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), - 'test_batch') + download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'test_batch') + + +def fetch(): + download(CIFAR10_URL, 'cifar', CIFAR10_MD5) + download(CIFAR100_URL, 'cifar', CIFAR100_MD5) diff --git a/python/paddle/v2/dataset/common.py b/python/paddle/v2/dataset/common.py index 3021b68ddb02ecaa874e21681796c0912ad4cc06..7021a6da05dec6be216534112c2df2586e73390f 100644 --- a/python/paddle/v2/dataset/common.py +++ b/python/paddle/v2/dataset/common.py @@ -17,6 +17,8 @@ import hashlib import os import shutil import sys +import importlib +import paddle.v2.dataset __all__ = ['DATA_HOME', 'download', 'md5file'] @@ -69,3 +71,13 @@ def dict_add(a_dict, ele): a_dict[ele] += 1 else: a_dict[ele] = 1 + + +def fetch_all(): + for module_name in filter(lambda x: not x.startswith("__"), + dir(paddle.v2.dataset)): + if "fetch" in dir( + importlib.import_module("paddle.v2.dataset.%s" % module_name)): + getattr( + importlib.import_module("paddle.v2.dataset.%s" % module_name), + "fetch")() diff --git a/python/paddle/v2/dataset/conll05.py b/python/paddle/v2/dataset/conll05.py index 9eab49ee39325c1c60fc511e0bd834e83aa987f0..f1b0ce16f21ad13d4564242c2359355236093032 100644 --- a/python/paddle/v2/dataset/conll05.py +++ b/python/paddle/v2/dataset/conll05.py @@ -196,3 +196,11 @@ def test(): words_name='conll05st-release/test.wsj/words/test.wsj.words.gz', props_name='conll05st-release/test.wsj/props/test.wsj.props.gz') return reader_creator(reader, word_dict, verb_dict, label_dict) + + +def fetch(): + download(WORDDICT_URL, 'conll05st', WORDDICT_MD5) + download(VERBDICT_URL, 'conll05st', VERBDICT_MD5) + download(TRGDICT_URL, 'conll05st', TRGDICT_MD5) + download(EMB_URL, 'conll05st', EMB_MD5) + download(DATA_URL, 'conll05st', DATA_MD5) diff --git a/python/paddle/v2/dataset/imdb.py b/python/paddle/v2/dataset/imdb.py index 76019d9f54020ff6f02c17eb6047cbd014a8ccf2..5284017ce08de8beb559f58fb6006639f40f5580 100644 --- a/python/paddle/v2/dataset/imdb.py +++ b/python/paddle/v2/dataset/imdb.py @@ -123,3 +123,7 @@ def test(word_idx): def word_dict(): return build_dict( re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150) + + +def fetch(): + paddle.v2.dataset.common.download(URL, 'imdb', MD5) diff --git a/python/paddle/v2/dataset/imikolov.py b/python/paddle/v2/dataset/imikolov.py index 97c160f111d09d61eb860c7f02552e635f2400a7..2931d06e7eb65bde887c56a8bc20e7a9c5e4d4e4 100644 --- a/python/paddle/v2/dataset/imikolov.py +++ b/python/paddle/v2/dataset/imikolov.py @@ -89,3 +89,7 @@ def train(word_idx, n): def test(word_idx, n): return reader_creator('./simple-examples/data/ptb.valid.txt', word_idx, n) + + +def fetch(): + paddle.v2.dataset.common.download(URL, "imikolov", MD5) diff --git a/python/paddle/v2/dataset/mnist.py b/python/paddle/v2/dataset/mnist.py index 16f2fcb99de4cb1971a7375a97b5daa209ee95ef..48a39b5493a8004d6eb034498a797af9c662bd19 100644 --- a/python/paddle/v2/dataset/mnist.py +++ b/python/paddle/v2/dataset/mnist.py @@ -106,3 +106,10 @@ def test(): TEST_IMAGE_MD5), paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist', TEST_LABEL_MD5), 100) + + +def fetch(): + paddle.v2.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', TRAIN_IMAGE_MD5) + paddle.v2.dataset.common.download(TRAIN_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) + paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist', TEST_IMAGE_MD5) + paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) diff --git a/python/paddle/v2/dataset/movielens.py b/python/paddle/v2/dataset/movielens.py index 25fd8227da2f219d75c6b830e65627ecf35be453..e148ddeca0370cd76128a31ce3a4d488e9737d98 100644 --- a/python/paddle/v2/dataset/movielens.py +++ b/python/paddle/v2/dataset/movielens.py @@ -30,6 +30,9 @@ __all__ = [ age_table = [1, 18, 25, 35, 45, 50, 56] +URL = 'http://files.grouplens.org/datasets/movielens/ml-1m.zip' +MD5 = 'c4d9eecfca2ab87c1945afe126590906' + class MovieInfo(object): def __init__(self, index, categories, title): @@ -77,10 +80,7 @@ USER_INFO = None def __initialize_meta_info__(): - fn = download( - url='http://files.grouplens.org/datasets/movielens/ml-1m.zip', - module_name='movielens', - md5sum='c4d9eecfca2ab87c1945afe126590906') + fn = download(URL, "movielens", MD5) global MOVIE_INFO if MOVIE_INFO is None: pattern = re.compile(r'^(.*)\((\d+)\)$') @@ -205,5 +205,9 @@ def unittest(): print train_count, test_count +def fetch(): + download(URL, "movielens", MD5) + + if __name__ == '__main__': unittest() diff --git a/python/paddle/v2/dataset/sentiment.py b/python/paddle/v2/dataset/sentiment.py index 71689fd61b6b14a7b5072caff4e2fd48a7f74072..0eeb6d5affd8c280fb74edc82cf24bf418ca8ef9 100644 --- a/python/paddle/v2/dataset/sentiment.py +++ b/python/paddle/v2/dataset/sentiment.py @@ -125,3 +125,7 @@ def test(): """ data_set = load_sentiment_data() return reader_creator(data_set[NUM_TRAINING_INSTANCES:]) + + +def fetch(): + nltk.download('movie_reviews', download_dir=common.DATA_HOME) diff --git a/python/paddle/v2/dataset/uci_housing.py b/python/paddle/v2/dataset/uci_housing.py index 27f454b137e3a40febd19cf085e2f4034cc16b24..dab8620441c966b19d8218025f8d8fa5b40d1c2c 100644 --- a/python/paddle/v2/dataset/uci_housing.py +++ b/python/paddle/v2/dataset/uci_housing.py @@ -89,3 +89,7 @@ def test(): yield d[:-1], d[-1:] return reader + + +def fetch(): + download(URL, 'uci_housing', MD5) diff --git a/python/paddle/v2/dataset/wmt14.py b/python/paddle/v2/dataset/wmt14.py index c686870a497668517d1c78c11c616ad8a71a2980..ee63a93f5ad918b5bbc949ae6ba29082b3f6abd5 100644 --- a/python/paddle/v2/dataset/wmt14.py +++ b/python/paddle/v2/dataset/wmt14.py @@ -16,7 +16,7 @@ wmt14 dataset """ import tarfile -import paddle.v2.dataset.common +from paddle.v2.dataset.common import download __all__ = ['train', 'test', 'build_dict'] @@ -95,11 +95,13 @@ def reader_creator(tar_file, file_name, dict_size): def train(dict_size): return reader_creator( - paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), - 'train/train', dict_size) + download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'train/train', dict_size) def test(dict_size): return reader_creator( - paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), - 'test/test', dict_size) + download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'test/test', dict_size) + + +def fetch(): + download(URL_TRAIN, 'wmt14', MD5_TRAIN) diff --git a/python/paddle/v2/tests/test_layer.py b/python/paddle/v2/tests/test_layer.py index 5ccd3d6913e1755a37b4da7c4f182147b880d3cb..89cc928dd7f624612ba717b4e5c2d6c2de7f8bed 100644 --- a/python/paddle/v2/tests/test_layer.py +++ b/python/paddle/v2/tests/test_layer.py @@ -22,7 +22,9 @@ import paddle.v2.networks as networks pixel = layer.data(name='pixel', type=data_type.dense_vector(128)) label = layer.data(name='label', type=data_type.integer_value(10)) -weight = layer.data(name='weight', type=data_type.dense_vector(10)) +weight = layer.data(name='weight', type=data_type.dense_vector(1)) +combine_weight = layer.data( + name='weight_combine', type=data_type.dense_vector(10)) score = layer.data(name='score', type=data_type.dense_vector(1)) hidden = layer.fc(input=pixel, @@ -81,7 +83,8 @@ class AggregateLayerTest(unittest.TestCase): class MathLayerTest(unittest.TestCase): def test_math_layer(self): addto = layer.addto(input=[pixel, pixel]) - linear_comb = layer.linear_comb(weights=weight, vectors=hidden, size=10) + linear_comb = layer.linear_comb( + weights=combine_weight, vectors=hidden, size=10) interpolation = layer.interpolation( input=[hidden, hidden], weight=score) bilinear = layer.bilinear_interp(input=conv, out_size_x=4, out_size_y=4)