diff --git a/.gitignore b/.gitignore index c84b2fc8c79d6e2c9c83e2b830ab176295846fd0..9622ab78e0e0556ec2b4cc974fee93ff680d54d2 100644 --- a/.gitignore +++ b/.gitignore @@ -24,4 +24,5 @@ cmake-build-* python/paddle/v2/framework/core.so CMakeFiles cmake_install.cmake - +paddle/.timestamp +python/paddlepaddle.egg-info/ diff --git a/.travis.yml b/.travis.yml index 8c8c6699d3d9abddd65a3a224c2bceedc7d88348..b4b83fcdbc84ce0fb0c91c816ebc3c964acfa590 100644 --- a/.travis.yml +++ b/.travis.yml @@ -37,8 +37,8 @@ before_install: - if [[ "$JOB" == "check_style" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python # protobuf version. - - pip install numpy wheel 'protobuf==3.1' sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker - - pip install rarfile nltk==3.2.2 scipy==0.19.0 recordio matplotlib Pillow + - pip install -r $TRAVIS_BUILD_DIR/python/requirements.txt + - pip install wheel sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit LinkChecker - curl https://glide.sh/get | bash - eval "$(GIMME_GO_VERSION=1.8.3 gimme)" - go get -u github.com/alecthomas/gometalinter diff --git a/CMakeLists.txt b/CMakeLists.txt index b174831109372cb014741d63032fa6a470e74042..c75b83e50cf9cef8290c37f88b38cdc3d77df39c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,8 +14,8 @@ cmake_minimum_required(VERSION 3.0) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") -set(PROJ_ROOT ${CMAKE_CURRENT_SOURCE_DIR}) -set(PROJ_BINARY_ROOT ${CMAKE_CURRENT_BINARY_DIR}) +set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) include(system) @@ -36,8 +36,8 @@ include(simd) ################################ Configurations ####################################### option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND}) option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND}) -option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." ${AVX_FOUND}) -option(WITH_MKLML "Compile PaddlePaddle with mklml package." ${AVX_FOUND}) +option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." OFF) +option(WITH_MKLML "Compile PaddlePaddle with mklml package." OFF) option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON) option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON) @@ -121,8 +121,8 @@ include(version) # set PADDLE_VERSION include(coveralls) # set code coverage -include_directories("${PROJ_ROOT}") -include_directories("${PROJ_ROOT}/paddle/cuda/include") +include_directories("${PADDLE_SOURCE_DIR}") +include_directories("${PADDLE_SOURCE_DIR}/paddle/cuda/include") include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto") include_directories("${CMAKE_CURRENT_BINARY_DIR}/go/pserver/client/c") include_directories(${Boost_INCLUDE_DIRS}) @@ -144,7 +144,7 @@ if(WITH_GPU) endif(WITH_GPU) if(WITH_MKLDNN) - list(APPEND EXTERNAL_LIBS ${MKLDNN_LIBRARY} ${MKLDNN_IOMP_LIB}) + list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB} ${MKLDNN_IOMP_LIB}) endif() if(USE_NNPACK) @@ -164,10 +164,12 @@ if(WITH_GOLANG) add_subdirectory(go) endif(WITH_GOLANG) +set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build") add_subdirectory(paddle) if(WITH_PYTHON) add_subdirectory(python) endif() + if(WITH_DOC) add_subdirectory(doc) endif() diff --git a/Dockerfile b/Dockerfile index 8ac123bf9c0f24b47b741611f3b80213c61b82e9..41b6729124228cec16be35d9b26da8042824b0b0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -64,13 +64,28 @@ RUN pip install --upgrade pip && \ pip install -U sphinx-rtd-theme==0.1.9 recommonmark && \ pip install pre-commit 'requests==2.9.2' 'ipython==5.3.0' && \ pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \ - pip install rarfile + pip install opencv-python rarfile 'scipy>=0.19.0' 'nltk>=3.2.2' # To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use # the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2 RUN apt-get install -y libssl-dev libffi-dev RUN pip install certifi urllib3[secure] +# TODO(qijun) The template library Eigen doesn't work well with GCC 5 +# coming with the default Docker image, so we switch to use GCC 4.8 +# by default. And I will check Eigen library later. + +RUN ln -sf gcc-4.8 /usr/bin/gcc && \ + ln -sf gcc-ar-4.8 /usr/bin/gcc-ar && \ + ln -sf gcc-nm-4.8 /usr/bin/gcc-nm && \ + ln -sf gcc-ranlib-4.8 /usr/bin/gcc-ranlib && \ + ln -sf gcc-4.8 /usr/bin/x86_64-linux-gnu-gcc && \ + ln -sf gcc-ar-4.8 /usr/bin/x86_64-linux-gnu-gcc-ar && \ + ln -sf gcc-nm-4.8 /usr/bin/x86_64-linux-gnu-gcc-nm && \ + ln -sf gcc-ranlib-4.8 /usr/bin/x86_64-linux-gnu-gcc-ranlib && \ + ln -sf g++-4.8 /usr/bin/g++ && \ + ln -sf g++-4.8 /usr/bin/x86_64-linux-gnu-g++ + # Install woboq_codebrowser to /woboq RUN git clone https://github.com/woboq/woboq_codebrowser /woboq && \ (cd /woboq \ diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 2ac098954647d37e26ac2499e0675dae39910edc..209f9078a637ac581d90212a48216eb388c477ed 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -129,7 +129,7 @@ if(WITH_GOLANG) add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/glide COMMAND env GOPATH=${GOPATH} ${GLIDE} install COMMAND touch ${CMAKE_BINARY_DIR}/glide - DEPENDS ${PROJ_ROOT}/go/glide.lock + DEPENDS ${PADDLE_SOURCE_DIR}/go/glide.lock WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go" ) diff --git a/cmake/cpplint.cmake b/cmake/cpplint.cmake index 5184f0815faac005b3dff1015395235f4e19d65b..8d5d533126c9b7fa84c725d614cf3486126d0284 100644 --- a/cmake/cpplint.cmake +++ b/cmake/cpplint.cmake @@ -52,7 +52,7 @@ macro(add_style_check_target TARGET_NAME) if(SOURCES_LIST) add_custom_command(TARGET ${TARGET_NAME} POST_BUILD - COMMAND "${PYTHON_EXECUTABLE}" "${PROJ_ROOT}/paddle/scripts/cpplint.py" + COMMAND "${PYTHON_EXECUTABLE}" "${PADDLE_SOURCE_DIR}/paddle/scripts/cpplint.py" "--filter=${STYLE_FILTER}" ${SOURCES_LIST} COMMENT "cpplint: Checking source code style" diff --git a/cmake/flags.cmake b/cmake/flags.cmake index e26d8d9df386e65137aa83cc60a43bfeabf7a4a6..b27eb71550b68b5c27e47bf067ae0df329bbd628 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -9,10 +9,12 @@ function(CheckCompilerCXX11Flag) if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.8) message(FATAL_ERROR "Unsupported GCC version. GCC >= 4.8 required.") endif() - # TODO(qijun) gcc 4.9 or later versions raise SEGV due to the optimization problem. - # Use Debug mode instead for now. - if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 4.9) - set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "" FORCE) + if(NOT ANDROID) + # TODO(qijun) gcc 4.9 or later versions raise SEGV due to the optimization problem. + # Use Debug mode instead for now. + if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 4.9) + set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "" FORCE) + endif() endif() elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") # cmake >= 3.0 compiler id "AppleClang" on Mac OS X, otherwise "Clang" diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 957c20bcf603f2f264b4658f63ac0eec438f12b1..d2aab938d4636b1583062e27b73cb30f5d56b7b0 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -411,7 +411,7 @@ function(py_test TARGET_NAME) set(multiValueArgs SRCS DEPS) cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) add_test(NAME ${TARGET_NAME} - COMMAND env PYTHONPATH=${PADDLE_PYTHON_PACKAGE_DIR} + COMMAND env PYTHONPATH=${PADDLE_PYTHON_BUILD_DIR}/lib-python python2 ${py_test_SRCS} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endif() diff --git a/cmake/package.cmake b/cmake/package.cmake index ff49a2d08e8f6004320acfce266339aa301eb9c4..79e02147f3f7cc19c1bf45d8a1d208a9a32416ff 100644 --- a/cmake/package.cmake +++ b/cmake/package.cmake @@ -12,7 +12,7 @@ set(CPACK_PACKAGE_DESCRIPTION "") set(CPACK_DEBIAN_PACKAGE_DEPENDS "libpython2.7-dev, libstdc++6, python-pip, curl, libgfortran3, python-pip-whl") set(CPACK_DEBIAN_PACKAGE_SECTION Devel) set(CPACK_DEBIAN_PACKAGE_VERSION ${PADDLE_VERSION}) -set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJ_ROOT}/paddle/scripts/deb/postinst") +set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PADDLE_SOURCE_DIR}/paddle/scripts/deb/postinst") #set(CPACK_GENERATOR "DEB") # Start cpack include (CMakePackageConfigHelpers) diff --git a/cmake/util.cmake b/cmake/util.cmake index 4a27623b7ffc0b389680baee52db440c78442f46..0da4969d310368ab27b0ed65237813c07d6e59f0 100644 --- a/cmake/util.cmake +++ b/cmake/util.cmake @@ -141,8 +141,8 @@ endmacro() function(create_resources res_file output_file) add_custom_command( OUTPUT ${output_file} - COMMAND python ARGS ${PROJ_ROOT}/cmake/make_resource.py ${res_file} ${output_file} - DEPENDS ${res_file} ${PROJ_ROOT}/cmake/make_resource.py) + COMMAND python ARGS ${PADDLE_SOURCE_DIR}/cmake/make_resource.py ${res_file} ${output_file} + DEPENDS ${res_file} ${PADDLE_SOURCE_DIR}/cmake/make_resource.py) endfunction() diff --git a/cmake/version.cmake b/cmake/version.cmake index ac1583a24c828629c46cb9cf4e965f8da2273732..cde650128a068faf32f4abfff5cdfdeb656d8577 100644 --- a/cmake/version.cmake +++ b/cmake/version.cmake @@ -4,7 +4,7 @@ set(tmp_version "HEAD") while ("${PADDLE_VERSION}" STREQUAL "") execute_process( COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0 ${tmp_version} - WORKING_DIRECTORY ${PROJ_ROOT} + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR} OUTPUT_VARIABLE GIT_TAG_NAME RESULT_VARIABLE GIT_RESULT ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) diff --git a/doc/design/auto_gradient_check.md b/doc/design/auto_gradient_check.md new file mode 100644 index 0000000000000000000000000000000000000000..1f4d4ec16f7c395005e610751d95c10f5f3adf52 --- /dev/null +++ b/doc/design/auto_gradient_check.md @@ -0,0 +1,146 @@ +## Auto Gradient Checker Design + +## Backgraound: +- Operator forward computing is easy to check if the result is right because it has a clear definition. **But** backpropagation is a notoriously difficult algorithm to debug and get right: + - 1. you should get the right backpropagation formula according to the forward computation. + - 2. you should implement it right in CPP. + - 3. it's difficult to prepare test data. + +- Auto gradient check gets a numeric gradient by forward Operator and use it as a reference of the backward Operator's result. It has several advantages: + - 1. numeric gradient checker only need forward operator. + - 2. user only need to prepare the input data for forward Operator. + +## Mathematical Theory +The following two document from stanford has a detailed explanation of how to get numeric gradient and why it's useful. + +- [Gradient checking and advanced optimization(en)](http://deeplearning.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization) +- [Gradient checking and advanced optimization(cn)](http://ufldl.stanford.edu/wiki/index.php/%E6%A2%AF%E5%BA%A6%E6%A3%80%E9%AA%8C%E4%B8%8E%E9%AB%98%E7%BA%A7%E4%BC%98%E5%8C%96) + + +## Numeric Gradient Implementation +### Python Interface +```python +def get_numeric_gradient(op, + input_values, + output_name, + input_to_check, + delta=0.005, + local_scope=None): + """ + Get Numeric Gradient for an operator's input. + + :param op: C++ operator instance, could be an network + :param input_values: The input variables. Should be an dictionary, key is + variable name. Value is numpy array. + :param output_name: The final output variable name. + :param input_to_check: The input variable need to get gradient. + :param delta: The perturbation value for numeric gradient method. The + smaller delta is, the more accurate result will get. But if that delta is + too small, it could occur numerical stability problem. + :param local_scope: The local scope used for get_numeric_gradient. + :return: The gradient array in numpy format. + """ +``` + +### Explaination: + +- Why need `output_name` + - One Operator may have multiple Output, you can get independent gradient from each Output. So user should set one output to calculate. + +- Why need `input_to_check` + - One operator may have multiple inputs. Gradient Op can calculate the gradient of these Inputs at the same time. But Numeric Gradient needs to calculate them one by one. So `get_numeric_gradient` is designed to calculate the gradient for one input. If you need to compute multiple inputs, you can call `get_numeric_gradient` multiple times. + + +### Core Algorithm Implementation + + +```python + # we only compute gradient of one element each time. + # we use a for loop to compute the gradient of every element. + for i in xrange(tensor_size): + # get one input element throw it's index i. + origin = tensor_to_check.get_float_element(i) + + # add delta to it, run op and then get the sum of the result tensor. + x_pos = origin + delta + tensor_to_check.set_float_element(i, x_pos) + y_pos = get_output() + + # plus delta to this element, run op and get the sum of the result tensor. + x_neg = origin - delta + tensor_to_check.set_float_element(i, x_neg) + y_neg = get_output() + + # restore old value + tensor_to_check.set_float_element(i, origin) + + # compute the gradient of this element and store it into a numpy array. + gradient_flat[i] = (y_pos - y_neg) / delta / 2 + + # reshape the gradient result to the shape of the source tensor. + return gradient_flat.reshape(tensor_to_check.get_dims()) +``` + +## Auto Graident Checker Framework + +Each Operator Kernel has three kinds of Gradient: + +- 1. Numeric Gradient +- 2. CPU Operator Gradient +- 3. GPU Operator Gradient(if supported) + +Numeric Gradient Only relies on forward Operator. So we use Numeric Gradient as the reference value. + +- 1. calculate the numeric gradient. +- 2. calculate CPU kernel Gradient with the backward Operator and compare it with the numeric gradient. +- 3. calculate GPU kernel Gradient with the backward Operator and compare it with the numeric gradient.(if support GPU) + +#### Python Interface + +```python + def check_grad(self, + forward_op, + input_vars, + inputs_to_check, + output_name, + no_grad_set=None, + only_cpu=False, + max_relative_error=0.005): + """ + :param forward_op: used to create backward_op + :param input_vars: numpy value of input variable. The following + computation will use these variables. + :param inputs_to_check: inputs var names that should check gradient. + :param output_name: output name that used to + :param max_relative_error: The relative tolerance parameter. + :param no_grad_set: used when create backward ops + :param only_cpu: only compute and check gradient on cpu kernel. + :return: + """ +``` + +### How to check if two numpy array is close enough? +if `abs_numeric_grad` is nearly zero, then use abs error for numeric_grad, not relative + +```python +numeric_grad = ... +operator_grad = numpy.array(scope.find_var(grad_var_name(name)).get_tensor()) + +abs_numeric_grad = numpy.abs(numeric_grad) +# if abs_numeric_grad is nearly zero, then use abs error for numeric_grad, not relative +# error. +abs_numeric_grad[abs_numeric_grad < 1e-3] = 1 + +diff_mat = numpy.abs(abs_numeric_grad - operator_grad) / abs_numeric_grad +max_diff = numpy.max(diff_mat) +``` + + +#### Notes: +1,The Input data for auto gradient checker should be reasonable to avoid numeric problem. + + +#### Refs: + +- [Gradient checking and advanced optimization(en)](http://deeplearning.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization) +- [Gradient checking and advanced optimization(cn)](http://ufldl.stanford.edu/wiki/index.php/%E6%A2%AF%E5%BA%A6%E6%A3%80%E9%AA%8C%E4%B8%8E%E9%AB%98%E7%BA%A7%E4%BC%98%E5%8C%96) diff --git a/doc/getstarted/build_and_install/docker_install_cn.rst b/doc/getstarted/build_and_install/docker_install_cn.rst index 02b96bb413156786db6dc77696c5640b97c10aa4..84e33177740ca1652efc09c8081c2519b4366906 100644 --- a/doc/getstarted/build_and_install/docker_install_cn.rst +++ b/doc/getstarted/build_and_install/docker_install_cn.rst @@ -74,13 +74,13 @@ PaddlePaddle发布新版本的时候都会发布对应版本的生产镜像以 .. code-block:: bash - docker run -it --rm paddlepaddle/paddle:0.10.0-dev /bin/bash + docker run -it --rm -v $(pwd):/paddle paddlepaddle/paddle:0.10.0-dev /bin/bash 或者,可以以后台进程方式运行容器: .. code-block:: bash - docker run -d -p 2202:22 -p 8888:8888 paddledev/paddle:0.10.0-dev + docker run -d -p 2202:22 -p 8888:8888 -v $(pwd):/paddle paddlepaddle/paddle:0.10.0-dev /usr/sbin/sshd -D 然后用密码 :code:`root` SSH进入容器: diff --git a/doc/templates/conf.py.cn.in b/doc/templates/conf.py.cn.in index 673948dfe7928240817b552141ec9bc2f8a672b7..41b35b5b233abd737db07aaeb6c6dd4bf6d42b08 100644 --- a/doc/templates/conf.py.cn.in +++ b/doc/templates/conf.py.cn.in @@ -13,7 +13,7 @@ # serve to show the default. import sys import os, subprocess -sys.path.insert(0, os.path.abspath('@PROJ_ROOT@/python')) +sys.path.insert(0, os.path.abspath('@PADDLE_SOURCE_DIR@/python')) import shlex from recommonmark import parser, transform import paddle @@ -24,7 +24,7 @@ AutoStructify = transform.AutoStructify # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -templates_path = ["@PROJ_ROOT@/doc_theme/templates"] +templates_path = ["@PADDLE_SOURCE_DIR@/doc_theme/templates"] # -- General configuration ------------------------------------------------ @@ -120,7 +120,7 @@ html_theme = 'sphinx_rtd_theme' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['@PROJ_ROOT@/doc_theme/static'] +html_static_path = ['@PADDLE_SOURCE_DIR@/doc_theme/static'] # Output file base name for HTML help builder. htmlhelp_basename = project + 'doc' diff --git a/doc/templates/conf.py.en.in b/doc/templates/conf.py.en.in index b6b50b7dcd5647b50a13703160489323ed90a1b4..5822c2481dd61da2084b0de76f6f65aa4e32e033 100644 --- a/doc/templates/conf.py.en.in +++ b/doc/templates/conf.py.en.in @@ -13,7 +13,7 @@ # serve to show the default. import sys import os, subprocess -sys.path.insert(0, os.path.abspath('@PROJ_ROOT@/python')) +sys.path.insert(0, os.path.abspath('@PADDLE_SOURCE_DIR@/python')) import shlex from recommonmark import parser, transform import paddle @@ -25,7 +25,7 @@ AutoStructify = transform.AutoStructify # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -templates_path = ["@PROJ_ROOT@/doc_theme/templates"] +templates_path = ["@PADDLE_SOURCE_DIR@/doc_theme/templates"] # -- General configuration ------------------------------------------------ @@ -120,7 +120,7 @@ html_theme = 'sphinx_rtd_theme' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['@PROJ_ROOT@/doc_theme/static'] +html_static_path = ['@PADDLE_SOURCE_DIR@/doc_theme/static'] # Output file base name for HTML help builder. htmlhelp_basename = project + 'doc' diff --git a/go/pserver/client/c/test/test_train.py b/go/pserver/client/c/test/test_train.py index 572a61e4ccaa9ef3d03a60d916e80eab907c6d88..8d9c6b9b20f515ed0865df8cf46b6dfc2d8ffa34 100644 --- a/go/pserver/client/c/test/test_train.py +++ b/go/pserver/client/c/test/test_train.py @@ -17,12 +17,10 @@ def main(): # network config x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13)) y_predict = paddle.layer.fc(input=x, - param_attr=paddle.attr.Param( - name='w', learning_rate=1e-3), + param_attr=paddle.attr.Param(name='w'), size=1, act=paddle.activation.Linear(), - bias_attr=paddle.attr.Param( - name='b', learning_rate=1e-3)) + bias_attr=paddle.attr.Param(name='b')) y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1)) cost = paddle.layer.mse_cost(input=y_predict, label=y) diff --git a/paddle/api/CMakeLists.txt b/paddle/api/CMakeLists.txt index 7a1e8b8b26ac6330c3799b7dfeb4447e171fe0f1..d7b3d2bdec1687425df804c0d56d568241f9e8b0 100644 --- a/paddle/api/CMakeLists.txt +++ b/paddle/api/CMakeLists.txt @@ -19,9 +19,9 @@ add_library(paddle_api STATIC ${API_SOURCES}) add_dependencies(paddle_api paddle_proto paddle_trainer_lib) INCLUDE(${SWIG_USE_FILE}) -INCLUDE_DIRECTORIES(${PROJ_ROOT}/paddle) +INCLUDE_DIRECTORIES(${PADDLE_SOURCE_DIR}/paddle) -FILE(GLOB PY_PADDLE_PYTHON_FILES ${PROJ_ROOT}/paddle/py_paddle/*.py) +FILE(GLOB PY_PADDLE_PYTHON_FILES ${PADDLE_SOURCE_DIR}/paddle/py_paddle/*.py) SET_SOURCE_FILES_PROPERTIES(Paddle.i PROPERTIES CPLUSPLUS ON) @@ -79,16 +79,16 @@ SWIG_LINK_LIBRARIES(swig_paddle ${START_END} ) -add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so - COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/swig_paddle.py ${PROJ_ROOT}/paddle/py_paddle - COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/_swig_paddle.so ${PROJ_ROOT}/paddle/py_paddle +add_custom_command(OUTPUT ${PADDLE_SOURCE_DIR}/paddle/py_paddle/_swig_paddle.so + COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/swig_paddle.py ${PADDLE_SOURCE_DIR}/paddle/py_paddle + COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/_swig_paddle.so ${PADDLE_SOURCE_DIR}/paddle/py_paddle COMMAND ${CMAKE_COMMAND} -E touch .timestamp - WORKING_DIRECTORY ${PROJ_ROOT}/paddle + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle DEPENDS _swig_paddle ) # TODO(yuyang18) : make wheel name calculated by cmake -add_custom_target(python_api_wheel ALL DEPENDS ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so) +add_custom_target(python_api_wheel ALL DEPENDS ${PADDLE_SOURCE_DIR}/paddle/py_paddle/_swig_paddle.so) if(WITH_TESTING) IF(NOT PY_PIP_FOUND) diff --git a/paddle/api/ParameterUpdater.cpp b/paddle/api/ParameterUpdater.cpp index 5934cb898b5f6adc74c237b1733a7459d8437a28..8cd73b348c507386cd88e907f7b431ef25e793aa 100644 --- a/paddle/api/ParameterUpdater.cpp +++ b/paddle/api/ParameterUpdater.cpp @@ -41,7 +41,7 @@ ParameterUpdater *ParameterUpdater::createNewRemoteUpdater( config->m->getConfig(), pserverSpec, useEtcd)); return updater; #else - throw UnsupportError(); + throw UnsupportError("not compiled with WITH_GOLANG"); #endif } diff --git a/paddle/capi/Arguments.cpp b/paddle/capi/Arguments.cpp index 8b81ec69e60399af86f055d2258276ac06e0b13a..1ec403077e7ea0bc8299e6266167b50ed81c3b08 100644 --- a/paddle/capi/Arguments.cpp +++ b/paddle/capi/Arguments.cpp @@ -90,6 +90,18 @@ paddle_error paddle_arguments_set_ids(paddle_arguments args, return kPD_NO_ERROR; } +paddle_error paddle_arguments_set_frame_shape(paddle_arguments args, + uint64_t ID, + uint64_t frameHeight, + uint64_t frameWidth) { + if (args == nullptr) return kPD_NULLPTR; + auto a = castArg(args); + if (ID >= a->args.size()) return kPD_OUT_OF_RANGE; + a->args[ID].setFrameHeight(frameHeight); + a->args[ID].setFrameWidth(frameWidth); + return kPD_NO_ERROR; +} + paddle_error paddle_arguments_set_sequence_start_pos(paddle_arguments args, uint64_t ID, uint32_t nestedLevel, diff --git a/paddle/capi/arguments.h b/paddle/capi/arguments.h index d71ea26a5d1aff130d974541532fda3b09bf6fe5..7c32524a00b11573a037968cf10d6488ea6c5644 100644 --- a/paddle/capi/arguments.h +++ b/paddle/capi/arguments.h @@ -111,6 +111,20 @@ PD_API paddle_error paddle_arguments_set_ids(paddle_arguments args, uint64_t ID, paddle_ivector ids); +/** + * @brief paddle_arguments_set_frame_shape Set the fram size of one argument + * in array, which index is `ID`. + * @param [in] args arguments array + * @param [in] ID array index + * @param [in] frameHeight maximum height of input images + * @param [in] frameWidth maximum width of input images + * @return paddle_error + */ +PD_API paddle_error paddle_arguments_set_frame_shape(paddle_arguments args, + uint64_t ID, + uint64_t frameHeight, + uint64_t frameWidth); + /** * @brief PDArgsSetSequenceStartPos Set sequence start position vector of one * argument in array, which index is `ID`. diff --git a/paddle/capi/examples/model_inference/common/common.h b/paddle/capi/examples/model_inference/common/common.h index a78522e4a7c3cb34b341b7f4c89b53d32b72f114..e32f2f9836f63ba10ef5be447a4c41514e079219 100644 --- a/paddle/capi/examples/model_inference/common/common.h +++ b/paddle/capi/examples/model_inference/common/common.h @@ -3,18 +3,21 @@ #include #include -#define CHECK(stmt) \ - do { \ - paddle_error __err__ = stmt; \ - if (__err__ != kPD_NO_ERROR) { \ - fprintf(stderr, "Invoke paddle error %d \n" #stmt, __err__); \ - exit(__err__); \ - } \ +#define CHECK(stmt) \ + do { \ + paddle_error __err__ = stmt; \ + if (__err__ != kPD_NO_ERROR) { \ + fprintf(stderr, "Invoke paddle error %d in " #stmt "\n", __err__); \ + exit(__err__); \ + } \ } while (0) void* read_config(const char* filename, long* size) { FILE* file = fopen(filename, "r"); - if (file == NULL) return NULL; + if (file == NULL) { + fprintf(stderr, "Open %s error\n", filename); + return NULL; + } fseek(file, 0L, SEEK_END); *size = ftell(file); fseek(file, 0L, SEEK_SET); diff --git a/paddle/capi/gradient_machine.cpp b/paddle/capi/gradient_machine.cpp index 00f76e0152366834eafc22df710cf3d6c7b8471f..b3287552db87d25edbf6e7f3d5e68121df49e9d6 100644 --- a/paddle/capi/gradient_machine.cpp +++ b/paddle/capi/gradient_machine.cpp @@ -54,6 +54,31 @@ paddle_error paddle_gradient_machine_create_for_inference( return kPD_NO_ERROR; } +paddle_error paddle_gradient_machine_create_for_inference_with_parameters( + paddle_gradient_machine* machine, void* mergedModel, uint64_t size) { + if (mergedModel == nullptr) return kPD_NULLPTR; + std::istringstream is(std::string(static_cast(mergedModel), size)); + int64_t modelConfigSize = 0; + is.read((char*)(&modelConfigSize), sizeof(modelConfigSize)); + std::string modelConfigProtobuf; + modelConfigProtobuf.resize(modelConfigSize); + is.read(&modelConfigProtobuf[0], modelConfigSize); + paddle::TrainerConfig config; + if (!config.ParseFromString(modelConfigProtobuf) || !config.IsInitialized()) { + return kPD_PROTOBUF_ERROR; + } + auto ptr = new paddle::capi::CGradientMachine(); + ptr->machine.reset(paddle::GradientMachine::create( + config.model_config(), CREATE_MODE_TESTING, {paddle::PARAMETER_VALUE})); + std::vector& parameters = ptr->machine->getParameters(); + for (auto& para : parameters) { + para->load(is); + } + + *machine = ptr; + return kPD_NO_ERROR; +} + paddle_error paddle_gradient_machine_destroy(paddle_gradient_machine machine) { delete cast(machine); return kPD_NO_ERROR; diff --git a/paddle/capi/gradient_machine.h b/paddle/capi/gradient_machine.h index d7e2dd9bf8037ed474971624d4518160604abe4d..c613ade5b24efbbf52f21c7ee86dd3189981c5ef 100644 --- a/paddle/capi/gradient_machine.h +++ b/paddle/capi/gradient_machine.h @@ -36,6 +36,18 @@ typedef void* paddle_gradient_machine; PD_API paddle_error paddle_gradient_machine_create_for_inference( paddle_gradient_machine* machine, void* modelConfigProtobuf, int size); +/** + * @brief Create a gradient machine used for model inference, using config with + * parameters which is generated by `paddle merge_model`. + * @param [out] machine that used for model inference. + * @param [in] mergedModel + * @param [in] size + * @return paddle_error + */ +PD_API paddle_error +paddle_gradient_machine_create_for_inference_with_parameters( + paddle_gradient_machine* machine, void* mergedModel, uint64_t size); + /** * @brief Load parameter from disk. * @param machine Gradient Machine. diff --git a/paddle/capi/tests/CMakeLists.txt b/paddle/capi/tests/CMakeLists.txt index d73f6b7733950bd472a46afb21694aac943fc909..8208808b94f54f2ddaf4d426a65b8db562b36aca 100644 --- a/paddle/capi/tests/CMakeLists.txt +++ b/paddle/capi/tests/CMakeLists.txt @@ -10,5 +10,5 @@ target_include_directories(capi_test_gradientMachine PUBLIC ${PADDLE_CAPI_INC_PATH}) target_link_libraries(capi_test_gradientMachine paddle_capi) add_test(NAME capi_test_gradientMachine - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python ${CMAKE_CURRENT_BINARY_DIR}/capi_test_gradientMachine - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/capi/tests) + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/capi_test_gradientMachine + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/capi/tests) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 6601918c90b967a659bceb8535fa0b53844b6f43..03985260241689a099ae9ebc136bd04831a44167 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -7,7 +7,7 @@ cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context) cc_test(tensor_test SRCS tensor_test.cc DEPS tensor) cc_test(eigen_test SRCS eigen_test.cc DEPS tensor) -cc_library(lod_tensor SRCS lod_tensor.cc details/lod_tensor.cc DEPS ddim place tensor) +cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor) cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor) cc_test(variable_test SRCS variable_test.cc) @@ -15,26 +15,27 @@ cc_test(variable_test SRCS variable_test.cc) cc_library(scope SRCS scope.cc) cc_test(scope_test SRCS scope_test.cc DEPS scope) -proto_library(attribute_proto SRCS attribute.proto) -proto_library(op_proto SRCS op_proto.proto DEPS attribute_proto) -proto_library(op_desc SRCS op_desc.proto DEPS attribute_proto) -cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf) -cc_test(op_desc_test SRCS op_desc_test.cc DEPS op_desc protobuf) +proto_library(framework_proto SRCS framework.proto) -cc_library(attribute SRCS attribute.cc DEPS op_desc op_proto) +cc_library(attribute SRCS attribute.cc DEPS framework_proto) -cc_library(operator SRCS operator.cc DEPS op_desc device_context tensor scope attribute) +cc_library(operator SRCS operator.cc DEPS framework_proto device_context tensor scope attribute) cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry) -cc_library(grad_op_builder SRCS grad_op_builder.cc DEPS op_proto operator) -cc_library(op_registry SRCS op_registry.cc DEPS op_desc grad_op_builder) +cc_library(grad_op_builder SRCS grad_op_builder.cc DEPS operator) +cc_library(op_registry SRCS op_registry.cc DEPS grad_op_builder) cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry) cc_test(grad_op_builder_test SRCS grad_op_builder_test.cc DEPS grad_op_builder op_registry add_op) -py_proto_compile(framework_py_proto SRCS attribute.proto op_proto.proto op_desc.proto) +py_proto_compile(framework_py_proto SRCS framework.proto) # Generate an empty __init__.py to make framework_py_proto as a valid python module. add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_dependencies(framework_py_proto framework_py_proto_init) +add_custom_command(TARGET framework_py_proto POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/proto + COMMAND cp *.py ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/proto/ + COMMENT "Copy generated python proto into directory paddle/v2/framework/proto." + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) cc_library(backward SRCS backward.cc DEPS net_op) cc_test(backward_test SRCS backward_test.cc DEPS backward) @@ -43,12 +44,16 @@ if(WITH_PYTHON) cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python backward - fc_op sgd_op add_op + mul_op + rowwise_add_op + sigmoid_op + softmax_op mean_op cross_entropy_op recurrent_op uniform_random_op + gaussian_random_op fill_zeros_like_op) endif(WITH_PYTHON) diff --git a/paddle/framework/attribute.cc b/paddle/framework/attribute.cc index 4c5790693b7e48396e945d09f4fdc72b86aa5978..9eb07acdff1d00dd926f1cee9c24f9f151006d7e 100644 --- a/paddle/framework/attribute.cc +++ b/paddle/framework/attribute.cc @@ -44,7 +44,7 @@ AttrType AttrTypeID>() { return STRINGS; } -Attribute GetAttrValue(const AttrDesc& attr_desc) { +Attribute GetAttrValue(const OpDesc::Attr& attr_desc) { switch (attr_desc.type()) { case paddle::framework::AttrType::INT: { return attr_desc.i(); diff --git a/paddle/framework/attribute.h b/paddle/framework/attribute.h index 3a5820e9c60539e3c771df5da4e82f6c1cae688f..08b47cabd4c2225c50022bd35734dcc2663324d6 100644 --- a/paddle/framework/attribute.h +++ b/paddle/framework/attribute.h @@ -14,16 +14,15 @@ limitations under the License. */ #pragma once -#include #include #include #include #include #include -#include "paddle/framework/attribute.pb.h" -#include "paddle/framework/op_desc.pb.h" +#include "paddle/framework/framework.pb.h" #include "paddle/platform/enforce.h" +#include "paddle/platform/variant.h" namespace paddle { namespace framework { @@ -37,7 +36,7 @@ typedef std::unordered_map AttributeMap; template AttrType AttrTypeID(); -Attribute GetAttrValue(const AttrDesc& attr_desc); +Attribute GetAttrValue(const OpDesc::Attr& attr_desc); // check whether a value(attribute) fit a certain limit template diff --git a/paddle/framework/attribute.proto b/paddle/framework/attribute.proto deleted file mode 100644 index 13ae312c10e934566384b8bd0f41dacd6c01fc2f..0000000000000000000000000000000000000000 --- a/paddle/framework/attribute.proto +++ /dev/null @@ -1,28 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -syntax = "proto2"; -package paddle.framework; - -// Attribute Type for paddle's Op. -// Op contains many attributes. Each type of attributes could be different. -// The AttrType will be shared between AttrDesc and AttrProto. -enum AttrType { - INT = 0; - FLOAT = 1; - STRING = 2; - INTS = 3; - FLOATS = 4; - STRINGS = 5; -} \ No newline at end of file diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index 47983110fa618e89d455a311af2112fc0ff2b9ae..315bdde76d3ffe57b656aa69688def6d274f592c 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -21,15 +21,24 @@ namespace paddle { namespace framework { -static bool AllInSet(const std::vector& names, - const std::string& suffix, - const std::unordered_set& set) { +template +static void ForEachVarName(Map& names, T callback) { for (auto& name : names) { - if (set.find(name + suffix) == set.end()) { - return false; + for (auto& n : name.second) { + if (callback(n)) return; } } - return true; +} + +static bool AllInSet( + const std::map>& names, + const std::string& suffix, const std::unordered_set& set) { + bool all_in_set = true; + ForEachVarName(names, [&all_in_set, &set, &suffix](const std::string& n) { + all_in_set = set.find(n + suffix) != set.end(); + return !all_in_set; + }); + return all_in_set; } static std::shared_ptr NOP() { @@ -68,10 +77,11 @@ std::shared_ptr BackwardRecursive( // Then all input gradients cannot be computed at all, and we put them into // `no_grad_names` set. Return an NOP. if (AllInSet(forwardOp.outputs_, kGradVarSuffix, no_grad_names)) { - for (auto& name : forwardOp.inputs_) { - // Mark all input is not need - no_grad_names.insert(name + kGradVarSuffix); - } + ForEachVarName(forwardOp.inputs_, + [&no_grad_names](const std::string& name) -> bool { + no_grad_names.insert(GradVarName(name)); + return false; + }); return NOP(); } @@ -93,9 +103,11 @@ std::shared_ptr BackwardRecursive( auto fwd = *it; auto bwd = BackwardRecursive(*fwd, no_grad_names, uniq_id); net->AddOp(bwd); - for (auto& out : bwd->outputs_) { - dup_output_ops[out].emplace_back(local_op_id); - } + ForEachVarName(bwd->outputs_, + [&dup_output_ops, local_op_id](const std::string& out) { + dup_output_ops[out].emplace_back(local_op_id); + return false; + }); } // Get unique ID for this method. auto uid = uniq_id++; @@ -117,7 +129,7 @@ std::shared_ptr BackwardRecursive( insert_position.push_back( {dup_op.back(), OpRegistry::CreateOp( - "add", {dup_outputs}, {name}, + "add", {{"X", {dup_outputs}}}, {{"Out", {name}}}, {{"input_format", std::vector{0, static_cast(dup_outputs.size())}}})}); } @@ -131,24 +143,30 @@ std::shared_ptr BackwardRecursive( } else { std::shared_ptr grad_op = OpRegistry::CreateGradOp(forwardOp); - for (std::string& grad_input : grad_op->inputs_) { + + ForEachVarName(grad_op->inputs_, [&no_grad_names, + &net](std::string& grad_input) { if (no_grad_names.count(grad_input)) { - std::string prefix = - grad_input.substr(0, grad_input.size() - kGradVarSuffix.size()); + // +1 for \0 + std::string prefix = grad_input.substr( + 0, grad_input.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1); grad_input = prefix + kZeroVarSuffix; // If part of input gradient of that operator is not calculated, fill // zero variables to that input gradient. - net->AddOp(OpRegistry::CreateOp("fill_zeros_like", {prefix}, - {grad_input}, {})); + net->AddOp(OpRegistry::CreateOp("fill_zeros_like", {{"Src", {prefix}}}, + {{"Dst", {grad_input}}}, {})); } - } + return false; + }); - for (std::string& grad_output : grad_op->outputs_) { - if (no_grad_names.count(grad_output)) { - grad_output = kEmptyVarName; - } - } + ForEachVarName(grad_op->outputs_, + [&no_grad_names](std::string& grad_output) { + if (no_grad_names.count(grad_output)) { + grad_output = kEmptyVarName; + } + return false; + }); if (net->ops_.empty()) { // Current no aux op is added to network return grad_op; @@ -167,7 +185,7 @@ std::shared_ptr Backward( std::unordered_set no_grad_names; no_grad_names.reserve(no_grad_vars.size()); - no_grad_names.insert(kEmptyVarName + kGradVarSuffix); + no_grad_names.insert(std::string(kEmptyVarName) + kGradVarSuffix); for (auto& name : no_grad_vars) { no_grad_names.insert(name + kGradVarSuffix); diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc index 6d5835bd2236118b6aff95743c4319faceb05d89..ebe52d5f284a8d271b666483001544a805d598ac 100644 --- a/paddle/framework/backward_test.cc +++ b/paddle/framework/backward_test.cc @@ -30,6 +30,7 @@ using DeviceContext = platform::DeviceContext; class EmptyOp : public OperatorBase { public: + using OperatorBase::OperatorBase; void InferShape(const Scope &scope) const override {} void Run(const Scope &scope, const DeviceContext &dev_ctx) const override {} }; @@ -38,9 +39,9 @@ class RowWiseAddOpMaker : public OpProtoAndCheckerMaker { public: RowWiseAddOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input X of Add").IgnoreGradient(); - AddInput("b", "Bias of Add").IgnoreGradient(); - AddOutput("Out", "Out of Add").IgnoreGradient(); + AddInput("X", "Input X of Add").AsNoGradient(); + AddInput("b", "Bias of Add").AsNoGradient(); + AddOutput("Out", "Out of Add").AsNoGradient(); AddComment("Add Op"); } }; @@ -49,8 +50,8 @@ class MulOpMaker : public OpProtoAndCheckerMaker { public: MulOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("A", "A"); - AddInput("B", "B"); + AddInput("X", "A"); + AddInput("Y", "B"); AddOutput("Out", "Out"); AddComment("Mul"); } @@ -61,7 +62,7 @@ class SigmoidOpMaker : public OpProtoAndCheckerMaker { SigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "X"); - AddOutput("Y", "Y"); + AddOutput("Out", "Y"); AddComment("Sigmoid"); } }; @@ -71,21 +72,25 @@ class NoGradOpMaker : public OpProtoAndCheckerMaker { NoGradOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "X input"); - AddOutput("Y", "Y output"); + AddOutput("Out", "Y output"); AddComment("NoGradOp, same input output. no Grad"); } }; class FcOp : public operators::NetOp { public: - void Init() override { - AddOp(OpRegistry::CreateOp("mul", {Input("X"), Input("W")}, - {Output("mul_result")}, {})); - auto b_name = Input("b"); + FcOp(const std::string &type, const VarNameMap &inputs, + const VarNameMap &outputs, const AttributeMap &attrs) + : NetOp(type, inputs, outputs, attrs) { + AddOp(OpRegistry::CreateOp("mul", + {{"X", {Input("X")}}, {"Y", {Input("W")}}}, + {{"Out", {Output("mul_result")}}}, {})); + auto input_b = Inputs("b"); std::string before_act = "mul_result"; - if (b_name != kEmptyVarName) { - AddOp(OpRegistry::CreateOp("rowwise_add", {Output("mul_result"), b_name}, - {Output("add_result")}, {})); + if (input_b.size() != 0) { + AddOp(OpRegistry::CreateOp( + "rowwise_add", {{"X", {Output("mul_result")}}, {"b", {input_b[0]}}}, + {{"Out", {Output("add_result")}}}, {})); before_act = "add_result"; } else { auto out_varname = Output("add_result"); @@ -94,8 +99,8 @@ class FcOp : public operators::NetOp { } } - AddOp(OpRegistry::CreateOp("sigmoid", {Output(before_act)}, {Output("Out")}, - {})); + AddOp(OpRegistry::CreateOp("sigmoid", {{"X", {Output(before_act)}}}, + {{"Out", {Output("Out")}}}, {})); CompleteAddOp(false); } }; @@ -107,8 +112,8 @@ class FcOpMaker : public OpProtoAndCheckerMaker { AddInput("X", "x"); AddInput("W", "w"); AddInput("b", "b"); - AddOutput("mul_result", "").SetTemporary(); - AddOutput("add_result", "").SetTemporary(); + AddOutput("mul_result", "").AsIntermediate(); + AddOutput("add_result", "").AsIntermediate(); AddOutput("Out", ""); AddComment(""); } @@ -139,7 +144,7 @@ class AddOpMaker : public OpProtoAndCheckerMaker { public: AddOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "x").SetMultiple(); + AddInput("X", "x").AsDuplicable(); AddOutput("Y", "y"); AddComment(""); } @@ -165,27 +170,24 @@ REGISTER_OP(many_output_op, f::EmptyOp, f::ManyOutputOpMaker); REGISTER_GRADIENT_OP(many_output_op, many_output_op_grad, f::EmptyOp); TEST(Backward, simple_op_grad) { - auto fwd = f::OpRegistry::CreateOp("rowwise_add", {"X", "b"}, {"Out"}, {}); + auto fwd = f::OpRegistry::CreateOp( + "rowwise_add", {{"X", {"x"}}, {"b", {"b"}}}, {{"Out", {"out"}}}, {}); ASSERT_NE(fwd, nullptr); auto gop = f::OpRegistry::CreateGradOp(*fwd); - ASSERT_EQ(4UL, gop->inputs_.size()); - ASSERT_EQ(f::kEmptyVarName, gop->inputs_[0]); + ASSERT_EQ(1UL, gop->inputs_.size()); ASSERT_EQ("rowwise_add_grad", gop->type_); - ASSERT_EQ("X" + f::kGradVarSuffix, gop->outputs_[0]); - ASSERT_EQ("b" + f::kGradVarSuffix, gop->outputs_[1]); - - ASSERT_EQ("X" + f::kGradVarSuffix, gop->Output("X" + f::kGradVarSuffix)); + ASSERT_EQ(f::GradVarName("x"), gop->Output(f::GradVarName("X"))); + ASSERT_EQ(f::GradVarName("b"), gop->Output(f::GradVarName("b"))); } TEST(Backward, simple_op_not_need_grad) { - auto fwd = f::OpRegistry::CreateOp("rowwise_add", {"X", "b"}, {"Out"}, {}); + auto fwd = f::OpRegistry::CreateOp( + "rowwise_add", {{"X", {"x"}}, {"b", {"b"}}}, {{"Out", {"out"}}}, {}); ASSERT_NE(fwd, nullptr); - auto gop = f::Backward(*fwd, {"X"}); - ASSERT_EQ(std::find(gop->outputs_.begin(), gop->outputs_.end(), - "X" + f::kGradVarSuffix), - gop->outputs_.end()); + auto gop = f::Backward(*fwd, {"x"}); + ASSERT_EQ(gop->Output(f::GradVarName("X")), f::kEmptyVarName); - auto no_input_gop = f::Backward(*fwd, {"X", "b"}); + auto no_input_gop = f::Backward(*fwd, {"x", "b"}); ASSERT_NE(no_input_gop, nullptr); ASSERT_TRUE(no_input_gop->IsNetOp()); ASSERT_EQ(0UL, @@ -193,8 +195,12 @@ TEST(Backward, simple_op_not_need_grad) { } TEST(Backward, net_fc_backward_normal) { - std::shared_ptr fwd = f::OpRegistry::CreateOp( - "fc", {"X", "w", "b"}, {"mul_result", "add_result", "out"}, {}); + std::shared_ptr fwd = + f::OpRegistry::CreateOp("fc", {{"X", {"x"}}, {"W", {"w"}}, {"b", {"b"}}}, + {{"mul_result", {"mul_res"}}, + {"add_result", {"add_re"}}, + {"Out", {"out"}}}, + {}); ASSERT_NE(fwd, nullptr); std::shared_ptr gop = f::Backward(*fwd, {}); ASSERT_TRUE(gop->IsNetOp()); @@ -216,8 +222,11 @@ TEST(Backward, net_fc_backward_normal) { TEST(Backward, net_fc_backward_not_have_b) { std::shared_ptr fwd = - f::OpRegistry::CreateOp("fc", {"X", "w", f::kEmptyVarName}, - {"mul_result", "add_result", "tmp"}, {}); + f::OpRegistry::CreateOp("fc", {{"X", {"x"}}, {"W", {"w"}}, {"b", {}}}, + {{"mul_result", {"mul_res"}}, + {"add_result", {"add_res"}}, + {"Out", {"tmp"}}}, + {}); ASSERT_NE(fwd, nullptr); std::shared_ptr gop = f::Backward(*fwd, {}); ASSERT_TRUE(gop->IsNetOp()); @@ -236,38 +245,49 @@ TEST(Backward, net_fc_backward_not_have_b) { TEST(Backward, net_input_of_network_not_need_grad) { ops::NetOp net; - net.AddOp(f::OpRegistry::CreateOp("fc", {"X", "W1", "b1"}, - {"mul_tmp_0", "add_tmp_0", "hidden0"}, {})); - net.AddOp(f::OpRegistry::CreateOp("fc", {"hidden0", "W2", "b2"}, - {"mul_tmp_1", "add_tmp_1", "hidden1"}, {})); + net.AddOp(f::OpRegistry::CreateOp( + "fc", {{"X", {"x"}}, {"W", {"W1"}}, {"b", {"b1"}}}, + {{"mul_result", {"mul_tmp_0"}}, + {"add_result", {"add_tmp_0"}}, + {"Out", {"hidden0"}}}, + {})); + net.AddOp(f::OpRegistry::CreateOp( + "fc", {{"X", {"hidden0"}}, {"W", {"W2"}}, {"b", {"b2"}}}, + {{"mul_result", {"mul_tmp_1"}}, + {"add_result", {"add_tmp_1"}}, + {"Out", {"hidden1"}}}, + {})); net.CompleteAddOp(); - auto bwd = Backward(net, {"X"}); // X@GRAD is not need. + auto bwd = Backward(net, {"x"}); // x@GRAD is not need. ASSERT_TRUE(bwd->IsNetOp()); auto bwd_net = static_cast(bwd.get()); - std::unordered_set all_output = std::unordered_set( - bwd_net->outputs_.begin(), bwd_net->outputs_.end()); - all_output.erase(f::kEmptyVarName); + auto output_vars = bwd_net->OutputVars(true); + std::unordered_set all_outputs = + std::unordered_set(output_vars.begin(), output_vars.end()); + all_outputs.erase(f::kEmptyVarName); for (auto &out : {"W1", "b1", "hidden0", "W2", "b2"}) { - ASSERT_NE(all_output.find(out + f::kGradVarSuffix), all_output.end()); + ASSERT_NE(all_outputs.find(f::GradVarName(out)), all_outputs.end()); } // Not Generated X - ASSERT_EQ(all_output.find("X" + f::kGradVarSuffix), all_output.end()); + ASSERT_EQ(all_outputs.find(f::GradVarName("X")), all_outputs.end()); ASSERT_EQ(2UL, bwd_net->ops_.size()); ASSERT_TRUE(bwd_net->ops_[1]->IsNetOp()); auto first_fc_grad = static_cast(bwd_net->ops_[1].get()); ASSERT_EQ(3UL, first_fc_grad->ops_.size()); ASSERT_EQ(f::kEmptyVarName, - first_fc_grad->ops_[2]->Output("A" + f::kGradVarSuffix)); + first_fc_grad->ops_[2]->Output(f::GradVarName("X"))); } TEST(Backward, net_shared_weight) { ops::NetOp net; - net.AddOp(f::OpRegistry::CreateOp("mul", {"X", "W"}, {"Out"}, {})); - net.AddOp(f::OpRegistry::CreateOp("mul", {"Out", "W"}, {"FinalOut"}, {})); + net.AddOp(f::OpRegistry::CreateOp("mul", {{"X", {"x"}}, {"Y", {"w"}}}, + {{"Out", {"out"}}}, {})); + net.AddOp(f::OpRegistry::CreateOp("mul", {{"X", {"out"}}, {"Y", {"w"}}}, + {{"Out", {"FinalOut"}}}, {})); net.CompleteAddOp(); auto bwd = f::Backward(net, {}); @@ -278,31 +298,37 @@ TEST(Backward, net_shared_weight) { } TEST(Backward, op_register_grad_not_for_network) { - auto fwd = f::OpRegistry::CreateOp( - "fc", {"X", "W", "b"}, {"mul_out", "add_out", "out1"}, - {{"temporary_index", std::vector{0, 1}}}); + auto fwd = + f::OpRegistry::CreateOp("fc", {{"X", {"x"}}, {"W", {"w"}}, {"b", {"b"}}}, + {{"mul_result", {"mul_out"}}, + {"add_result", {"add_out"}}, + {"Out", {"out1"}}}, + {{"temporary_index", std::vector{0, 1}}}); ASSERT_THROW(f::OpRegistry::CreateGradOp(*fwd), EnforceNotMet); } TEST(Backward, op_all_input_are_not_need) { - auto fwd = f::OpRegistry::CreateOp("rowwise_add", {"X", "b"}, {"Out"}, {}); - auto backward = f::Backward(*fwd, {"X", "b"}); + auto fwd = f::OpRegistry::CreateOp( + "rowwise_add", {{"X", {"x"}}, {"b", {"b"}}}, {{"Out", {"out"}}}, {}); + auto backward = f::Backward(*fwd, {"x", "b"}); ASSERT_TRUE(backward->IsNetOp()); auto net = static_cast(backward.get()); ASSERT_TRUE(net->ops_.empty()); } TEST(Backward, op_all_output_are_not_need) { - auto fwd = f::OpRegistry::CreateOp("rowwise_add", {"X", "b"}, {"Out"}, {}); - auto backward = f::Backward(*fwd, {"Out"}); + auto fwd = f::OpRegistry::CreateOp( + "rowwise_add", {{"X", {"x"}}, {"b", {"b"}}}, {{"Out", {"out"}}}, {}); + auto backward = f::Backward(*fwd, {"out"}); ASSERT_TRUE(backward->IsNetOp()); auto net = static_cast(backward.get()); ASSERT_TRUE(net->ops_.empty()); } TEST(Backward, op_part_of_output_are_not_need) { - auto fwd = f::OpRegistry::CreateOp("many_output_op", {"X"}, {"Y", "Z"}, {}); + auto fwd = f::OpRegistry::CreateOp("many_output_op", {{"x", {"X"}}}, + {{"y", {"Y"}}, {"z", {"Z"}}}, {}); auto backward = f::Backward(*fwd, {"Z"}); ASSERT_TRUE(backward->IsNetOp()); auto net = static_cast(backward.get()); @@ -310,60 +336,77 @@ TEST(Backward, op_part_of_output_are_not_need) { auto &fill_zero = *net->ops_[0]; ASSERT_EQ("fill_zeros_like", fill_zero.type_); - ASSERT_EQ(1UL, fill_zero.inputs_.size()); - ASSERT_EQ("Z", fill_zero.inputs_[0]); - ASSERT_EQ(1UL, fill_zero.outputs_.size()); - ASSERT_EQ("Z" + f::kZeroVarSuffix, fill_zero.outputs_[0]); + ASSERT_EQ(1UL, fill_zero.Inputs("Src").size()); + ASSERT_EQ("Z", fill_zero.Input("Src")); + ASSERT_EQ(1UL, fill_zero.Outputs("Dst").size()); + ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, fill_zero.Output("Dst")); auto &d_many_out = *net->ops_[1]; ASSERT_EQ("many_output_op_grad", d_many_out.type_); ASSERT_EQ(1UL + 2UL + 2UL, d_many_out.inputs_.size()); // I/O/OG - ASSERT_EQ("Z" + f::kZeroVarSuffix, d_many_out.Input("z" + f::kGradVarSuffix)); - ASSERT_EQ("Y" + f::kGradVarSuffix, d_many_out.Input("y" + f::kGradVarSuffix)); - ASSERT_EQ("X" + f::kGradVarSuffix, - d_many_out.Output("x" + f::kGradVarSuffix)); + ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, + d_many_out.Input(f::GradVarName("z"))); + ASSERT_EQ(f::GradVarName("Y"), d_many_out.Input(f::GradVarName("y"))); + ASSERT_EQ(f::GradVarName("X"), d_many_out.Output(f::GradVarName("x"))); } TEST(Backward, op_part_of_input_are_not_need) { - auto fwd = f::OpRegistry::CreateOp("mul", {"a", "b"}, {"out"}, {}); + auto fwd = f::OpRegistry::CreateOp("mul", {{"X", {"a"}}, {"Y", {"b"}}}, + {{"Out", {"out"}}}, {}); auto backward = f::Backward(*fwd, {"a"}); auto &grad_mul = *backward; ASSERT_EQ(grad_mul.type_, "mul_grad"); ASSERT_EQ(grad_mul.inputs_.size(), 2UL + 1UL + 1UL); ASSERT_EQ(grad_mul.outputs_.size(), 2UL); - ASSERT_EQ(grad_mul.Output("A" + f::kGradVarSuffix), f::kEmptyVarName); - ASSERT_EQ(grad_mul.Output("B" + f::kGradVarSuffix), "b" + f::kGradVarSuffix); - ASSERT_EQ(grad_mul.Input("Out" + f::kGradVarSuffix), - "out" + f::kGradVarSuffix); - ASSERT_EQ(grad_mul.Input("A"), "a"); - ASSERT_EQ(grad_mul.Input("B"), "b"); + ASSERT_EQ(grad_mul.Output(f::GradVarName("X")), f::kEmptyVarName); + ASSERT_EQ(grad_mul.Output(f::GradVarName("Y")), f::GradVarName("b")); + ASSERT_EQ(grad_mul.Input(f::GradVarName("Out")), f::GradVarName("out")); + ASSERT_EQ(grad_mul.Input("X"), "a"); + ASSERT_EQ(grad_mul.Input("Y"), "b"); ASSERT_EQ(grad_mul.Input("Out"), "out"); } TEST(Backward, linear_net_intermediate_variable_has_no_grad) { ops::NetOp net; - net.AddOp(f::OpRegistry::CreateOp("fc", {"x1", "w1", "b1"}, - {"mul_out1", "add_out1", "out1"}, {})); - net.AddOp(f::OpRegistry::CreateOp("fc", {"out1", "w2", "b2"}, - {"mul_out2", "tmp_out2", "out2"}, {})); - net.AddOp(f::OpRegistry::CreateOp("fc", {"out2", "w3", "b3"}, - {"mul_out3", "tmp_out3", "out3"}, {})); + net.AddOp(f::OpRegistry::CreateOp( + "fc", {{"X", {"x1"}}, {"W", {"w1"}}, {"b", {"b1"}}}, + {{"mul_result", {"mul_out1"}}, + {"add_result", {"add_out1"}}, + {"Out", {"out1"}}}, + {})); + net.AddOp(f::OpRegistry::CreateOp( + "fc", {{"X", {"out1"}}, {"W", {"w2"}}, {"b", {"b2"}}}, + {{"mul_result", {"mul_out2"}}, + {"add_result", {"tmp_out2"}}, + {"Out", {"out2"}}}, + {})); + net.AddOp(f::OpRegistry::CreateOp( + "fc", {{"X", {"out2"}}, {"W", {"w3"}}, {"b", {"b3"}}}, + {{"mul_result", {"mul_out3"}}, + {"add_result", {"tmp_out3"}}, + {"Out", {"out3"}}}, + {})); net.CompleteAddOp(); + auto backward = f::Backward(net, {"mul_out2", "tmp_out2", "out2"}); ASSERT_TRUE(backward->IsNetOp()); auto bwd_net = static_cast(backward.get()); ASSERT_EQ(bwd_net->ops_.size(), 3UL); auto &grad_fc = *bwd_net->ops_[0]; - EXPECT_EQ(grad_fc.inputs_.size(), - 3UL /* external input number */ + + const char *all = paddle::operators::NetOp::kAll; + EXPECT_EQ(grad_fc.inputs_[all].size(), + 2UL /* external input number */ + 1UL /* external output number*/ + 1UL /* number of gradient of external output*/ + 2U /* internal variable number*/); - EXPECT_EQ(grad_fc.outputs_.size(), 2UL /* input number of mul*/ - + 2UL /* input number of rowwise_add */ - + 1UL /* input number of sigmod */); - EXPECT_EQ(bwd_net->ops_[1]->inputs_.size(), 0UL); - EXPECT_EQ(bwd_net->ops_[1]->outputs_.size(), 0UL); - EXPECT_EQ(bwd_net->ops_[2]->inputs_.size(), 0UL); - EXPECT_EQ(bwd_net->ops_[2]->outputs_.size(), 0UL); + EXPECT_EQ(grad_fc.outputs_[all].size(), + 2UL /* input number of mul*/ + + 2UL /* input number of rowwise_add + */ + + 1UL /* input number of sigmod */); + EXPECT_EQ(bwd_net->ops_[1]->inputs_[all].size(), 0UL); + EXPECT_EQ(bwd_net->ops_[1]->outputs_[all].size(), 0UL); + EXPECT_EQ(bwd_net->ops_[2]->inputs_[all].size(), 0UL); + EXPECT_EQ(bwd_net->ops_[2]->outputs_[all].size(), 0UL); } diff --git a/paddle/framework/ddim.cc b/paddle/framework/ddim.cc index 545c1dcc2a1682839d90194002fdbb748d85e808..cfd3e8dfdec0e92620aef5cd246b4622b779ce19 100644 --- a/paddle/framework/ddim.cc +++ b/paddle/framework/ddim.cc @@ -283,6 +283,5 @@ std::ostream& operator<<(std::ostream& os, const DDim& ddim) { DDim::DDim(std::initializer_list init_list) { *this = make_ddim(init_list); } - } // namespace framework } // namespace paddle diff --git a/paddle/framework/ddim.h b/paddle/framework/ddim.h index 5aa5af0c19be5a209c760282cb1a090fc57a53ad..95f294b62737be5c3eac39303148ac35da29fe7d 100644 --- a/paddle/framework/ddim.h +++ b/paddle/framework/ddim.h @@ -14,13 +14,12 @@ limitations under the License. */ #pragma once -#include #include #include #include #include "paddle/framework/dim.h" #include "paddle/platform/enforce.h" -#include "unsupported/Eigen/CXX11/Tensor" +#include "paddle/platform/variant.h" namespace paddle { namespace framework { diff --git a/paddle/framework/details/lod_tensor.cc b/paddle/framework/details/lod_tensor.cc deleted file mode 100644 index 9ad3979e5b511517f75d2d43004f97ee1576953b..0000000000000000000000000000000000000000 --- a/paddle/framework/details/lod_tensor.cc +++ /dev/null @@ -1,62 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#include "paddle/framework/lod_tensor.h" - -#include - -namespace paddle { -namespace framework { -namespace details { - -using LOD = LODTensor::LOD; - -std::shared_ptr SliceLOD(const LOD &lod, size_t level_begin, - size_t level_end) { - auto new_lod = std::make_shared(); - new_lod->reserve(level_end - level_begin); - for (size_t i = level_begin; i < level_end; i++) { - new_lod->emplace_back(lod[i]); - } - return new_lod; -} - -std::shared_ptr SliceLOD(const LOD &lod, size_t level, size_t elem_begin, - size_t elem_end, bool tensor_shared) { - // slice the lod. - auto new_lod = std::make_shared(); - new_lod->reserve(lod.size() - level); - auto start = lod.at(level)[elem_begin]; - auto end = lod.at(level)[elem_end]; - - for (auto it = lod.begin() + level; it != lod.end(); it++) { - auto it_begin = std::find(it->begin(), it->end(), start); - auto it_end = std::find(it_begin, it->end(), end); - PADDLE_ENFORCE(it_begin != it->end(), "error in parsing lod info"); - PADDLE_ENFORCE(it_end != it->end(), "error in parsing lod info"); - new_lod->emplace_back(it_begin, it_end + 1); - if (!tensor_shared) { - // reset offset if tensor is copyed and sliced. - std::transform(new_lod->back().begin(), new_lod->back().end(), - new_lod->back().begin(), - [start](int v) { return v - start; }); - PADDLE_ENFORCE(new_lod->back().front() == 0, "error in slice LOD"); - } - } - return new_lod; -} - -} // namespace details -} // namespace framework -} // namespace paddle diff --git a/paddle/framework/details/lod_tensor.h b/paddle/framework/details/lod_tensor.h deleted file mode 100644 index 9a6a6cd2ea41f02db991bdc0a2b917433dafed99..0000000000000000000000000000000000000000 --- a/paddle/framework/details/lod_tensor.h +++ /dev/null @@ -1,46 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#pragma once - -#include - -namespace paddle { -namespace framework { -namespace details { - -/* - * Slice levels from LOD. - * - * @lod: LOD to slice. - * @level_begin: level to begin slice. - * @level_end: level to end slice. - */ -std::shared_ptr SliceLOD(const LODTensor::LOD &lod, - size_t level_begin, size_t level_end); - -/* - * Slice elements from a level of LOD. - * - * @lod: LOD to slice. - * @level: which level to slice. - * @elem_begin: element's index to begin slice. - * @elem_end: element's index to end slice. - */ -std::shared_ptr SliceLOD(const LODTensor::LOD &lod, - size_t level, size_t elem_begin, - size_t elem_end, bool tensor_shared); -} // namespace details -} // namespace framework -} // namespace paddle diff --git a/paddle/framework/framework.proto b/paddle/framework/framework.proto new file mode 100644 index 0000000000000000000000000000000000000000..7077e8aa2c77c24efdbb34ed3a13821fe7678455 --- /dev/null +++ b/paddle/framework/framework.proto @@ -0,0 +1,82 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +syntax = "proto2"; +package paddle.framework; + +enum AttrType { + INT = 0; + FLOAT = 1; + STRING = 2; + INTS = 3; + FLOATS = 4; + STRINGS = 5; +} + +// OpDesc describes an instance of a C++ framework::OperatorBase +// derived class type. +message OpDesc { + + message Attr { + required string name = 1; + required AttrType type = 2; + optional int32 i = 3; + optional float f = 4; + optional string s = 5; + repeated int32 ints = 6; + repeated float floats = 7; + repeated string strings = 8; + }; + + message Var { + required string parameter = 1; + repeated string arguments = 2; + }; + + required string type = 3; + repeated Var inputs = 1; + repeated Var outputs = 2; + repeated Attr attrs = 4; +}; + +// OpProto describes a C++ framework::OperatorBase derived class. +message OpProto { + + // VarProto describes the C++ type framework::Variable. + message Var { + required string name = 1; + required string comment = 2; + + optional bool duplicable = 3 [ default = false ]; + optional bool intermediate = 4 [ default = false ]; + optional bool no_gradient = 5 [ default = false ]; + } + + // AttrProto describes the C++ type Attribute. + message Attr { + required string name = 1; + required AttrType type = 2; + required string comment = 3; + // If that attribute is generated, it means the Paddle third + // language binding has responsibility to fill that + // attribute. End-User should not set that attribute. + optional bool generated = 4 [ default = false ]; + } + + required string type = 1; + repeated Var inputs = 2; + repeated Var outputs = 3; + repeated Attr attrs = 4; + required string comment = 5; +} diff --git a/paddle/framework/grad_op_builder.cc b/paddle/framework/grad_op_builder.cc index 6d032fb78f099f5142d64e531d1a03c10ed5e68e..21bc30d1fbdae31548547bccf39e78fe16eedfaa 100644 --- a/paddle/framework/grad_op_builder.cc +++ b/paddle/framework/grad_op_builder.cc @@ -13,90 +13,52 @@ express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/framework/grad_op_builder.h" -#include "paddle/framework/op_proto.pb.h" +#include "paddle/framework/framework.pb.h" #include "paddle/framework/op_registry.h" namespace paddle { namespace framework { - -class OpRegistry; - -using VarIndexMap = std::unordered_map; - enum class OpArgType { IN, OUT }; -static std::vector* GetOpFormat(OperatorBase* op, const OpArgType& type) { - std::string key = type == OpArgType::IN ? "input_format" : "output_format"; - return op->attrs_.count(key) - ? &boost::get>(op->attrs_.at(key)) - : nullptr; -} - -static const std::vector* GetOpFormat(const OperatorBase* op, - const OpArgType& type) { - std::string key = type == OpArgType::IN ? "input_format" : "output_format"; - return op->attrs_.count(key) - ? &boost::get>(op->attrs_.at(key)) - : nullptr; -} - -static void TransOpArg(const OperatorBase* src_op, OperatorBase* dst_op, - const OpArgType& src_type, const OpArgType& dst_type, - int& idx, bool is_grad) { - const std::vector& src_inout = +static void TransOpArg(const OperatorBase* src_op, + OperatorBase::VarNameMap* vars, + const OpArgType& src_type, bool is_grad) { + const auto& src_inout = src_type == OpArgType::IN ? src_op->inputs_ : src_op->outputs_; - const std::vector* src_format = GetOpFormat(src_op, src_type); + auto& dst_inout = *vars; - std::vector& dst_inout = - dst_type == OpArgType::IN ? dst_op->inputs_ : dst_op->outputs_; - std::vector* dst_format = GetOpFormat(dst_op, dst_type); - const OpProto& proto = OpRegistry::protos().at(src_op->type_); + const OpProto& proto = OpProtos().at(src_op->type_); const auto& src_arg_list = src_type == OpArgType::IN ? proto.inputs() : proto.outputs(); - for (const auto& arg : src_arg_list) { - std::string src_name = arg.name(); - std::string dst_name = is_grad ? src_name + kGradVarSuffix : src_name; - (*dst_op->in_out_idxs_)[dst_name] = idx++; - int src_arg_idx = src_op->in_out_idxs_->at(src_name); - int src_begin = - src_format == nullptr ? src_arg_idx : src_format->at(src_arg_idx); - int src_end = src_format == nullptr ? src_arg_idx + 1 - : src_format->at(src_arg_idx + 1); - for (int i = src_begin; i < src_end; ++i) { - std::string s = - is_grad ? src_inout[i] + kGradVarSuffix - : (arg.ignore_gradient() ? kEmptyVarName : src_inout[i]); - dst_inout.emplace_back(s); - } - if (dst_format != nullptr) { - dst_format->push_back(dst_inout.size()); + if (arg.no_gradient() && !is_grad) continue; + const std::string src_name = arg.name(); + std::string dst_name = is_grad ? GradVarName(src_name) : src_name; + dst_inout[dst_name].reserve(src_inout.at(src_name).size()); + for (auto& var_name : src_inout.at(src_name)) { + std::string s = is_grad ? GradVarName(var_name) : var_name; + dst_inout[dst_name].emplace_back(s); } } } OperatorBase* BuildGradOp(const OperatorBase* op) { - std::string grad_op_type = OpRegistry::grad_ops().at(op->type_); - OperatorBase* grad_op = OpRegistry::op_creators().at(grad_op_type)(); - grad_op->type_ = grad_op_type; - grad_op->attrs_ = op->attrs_; - grad_op->attrs_.erase("input_format"); - grad_op->attrs_.erase("output_format"); - if (GetOpFormat(op, OpArgType::IN) != nullptr) { - grad_op->attrs_["output_format"] = std::vector({0}); - } - if (GetOpFormat(op, OpArgType::IN) != nullptr || - GetOpFormat(op, OpArgType::OUT) != nullptr) { - grad_op->attrs_["input_format"] = std::vector({0}); - } - grad_op->in_out_idxs_.reset(new VarIndexMap()); - int in_idx = 0; - int out_idx = 0; - TransOpArg(op, grad_op, OpArgType::IN, OpArgType::IN, in_idx, false); // I - TransOpArg(op, grad_op, OpArgType::OUT, OpArgType::IN, in_idx, false); // G - TransOpArg(op, grad_op, OpArgType::OUT, OpArgType::IN, in_idx, true); // OG - TransOpArg(op, grad_op, OpArgType::IN, OpArgType::OUT, out_idx, true); // IG - return grad_op; + auto gop_type_it = OpRegistry::grad_ops().find(op->type_); + PADDLE_ENFORCE(gop_type_it != OpRegistry::grad_ops().end(), + "Operator %s do not register gradient type", op->type_); + auto& grad_op_type = gop_type_it->second; + OperatorBase::VarNameMap inputs; + OperatorBase::VarNameMap outputs; + TransOpArg(op, &inputs, OpArgType::IN, false); // I + TransOpArg(op, &inputs, OpArgType::OUT, false); // O + TransOpArg(op, &inputs, OpArgType::OUT, true); // OG + TransOpArg(op, &outputs, OpArgType::IN, true); // IG + auto gop_it = OpRegistry::op_creators().find(grad_op_type); + PADDLE_ENFORCE(gop_it != OpRegistry::op_creators().end(), + "Operator %s 's Gradient %s's creator cannot be found", + op->type_, grad_op_type); + + return gop_it->second(grad_op_type, inputs, outputs, op->attrs_); } } // namespace framework diff --git a/paddle/framework/grad_op_builder_test.cc b/paddle/framework/grad_op_builder_test.cc index cf7143eba4460e5619188b82ffe23db11a04a236..ebaf84545fce0d281d8821861264cddc8854893d 100644 --- a/paddle/framework/grad_op_builder_test.cc +++ b/paddle/framework/grad_op_builder_test.cc @@ -10,6 +10,7 @@ namespace framework { class NOP : public OperatorBase { public: + using OperatorBase::OperatorBase; void InferShape(const Scope &scope) const override {} void Run(const Scope &scope, const platform::DeviceContext &dev_ctx) const override {} @@ -20,10 +21,10 @@ class MutiInOutOpMaker : public OpProtoAndCheckerMaker { MutiInOutOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("In1", "a single input"); - AddInput("In2_mult", "a multiple input").SetMultiple(); + AddInput("In2_mult", "a multiple input").AsDuplicable(); AddInput("In3", "another single input"); AddOutput("Out1", "a single output"); - AddOutput("Out2_mult", "a multiple output").SetMultiple(); + AddOutput("Out2_mult", "a multiple output").AsDuplicable(); AddComment("test op with multiple inputs and outputs"); } }; @@ -33,10 +34,10 @@ class IOIgnoredOpMaker : public OpProtoAndCheckerMaker { IOIgnoredOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("In1", "a single input"); - AddInput("In2_mult", "a multiple input").SetMultiple().IgnoreGradient(); - AddInput("In3_mult", "another multiple input").SetMultiple(); - AddOutput("Out1_mult", "a multiple output").SetMultiple(); - AddOutput("Out2", "a single output").IgnoreGradient(); + AddInput("In2_mult", "a multiple input").AsDuplicable().AsNoGradient(); + AddInput("In3_mult", "another multiple input").AsDuplicable(); + AddOutput("Out1_mult", "a multiple output").AsDuplicable(); + AddOutput("Out2", "a single output").AsNoGradient(); AddComment("op with inputs and outputs ignored in gradient calculating"); } }; @@ -47,18 +48,18 @@ class IOIgnoredOpMaker : public OpProtoAndCheckerMaker { namespace f = paddle::framework; TEST(GradOpBuilder, AddTwo) { - std::shared_ptr add_op( - f::OpRegistry::CreateOp("add_two", {"x", "y"}, {"out"}, {})); + std::shared_ptr add_op(f::OpRegistry::CreateOp( + "add_two", {{"X", {"x"}}, {"Y", {"y"}}}, {{"Out", {"out"}}}, {})); std::shared_ptr grad_add_op = f::OpRegistry::CreateGradOp(*add_op); - EXPECT_EQ(static_cast(grad_add_op->inputs_.size()), 4); - EXPECT_EQ(static_cast(grad_add_op->outputs_.size()), 2); + EXPECT_EQ(grad_add_op->inputs_.size(), 4UL); + EXPECT_EQ(grad_add_op->outputs_.size(), 2UL); EXPECT_EQ(grad_add_op->Input("X"), "x"); EXPECT_EQ(grad_add_op->Input("Y"), "y"); EXPECT_EQ(grad_add_op->Input("Out"), "out"); - EXPECT_EQ(grad_add_op->Input("Out@GRAD"), "out@GRAD"); - EXPECT_EQ(grad_add_op->Output("X@GRAD"), "x@GRAD"); - EXPECT_EQ(grad_add_op->Output("Y@GRAD"), "y@GRAD"); + EXPECT_EQ(grad_add_op->Input(f::GradVarName("Out")), f::GradVarName("out")); + EXPECT_EQ(grad_add_op->Output(f::GradVarName("X")), f::GradVarName("x")); + EXPECT_EQ(grad_add_op->Output(f::GradVarName("Y")), f::GradVarName("y")); } REGISTER_OP(mult_io, f::NOP, f::MutiInOutOpMaker); @@ -67,15 +68,15 @@ REGISTER_OP(io_ignored, f::NOP, f::IOIgnoredOpMaker); REGISTER_GRADIENT_OP(io_ignored, io_ignored_grad, f::NOP); TEST(GradOpBuilder, MutiInOut) { - f::AttributeMap attrs{{"input_format", std::vector{0, 1, 4, 5}}, - {"output_format", std::vector{0, 1, 3}}}; std::shared_ptr test_op(f::OpRegistry::CreateOp( - "mult_io", {"in1", "in2_1", "in2_2", "in2_3", "in3"}, - {"out1", "out2_1", "out2_2"}, attrs)); + "mult_io", {{"In1", {"in1"}}, + {"In2_mult", {"in2_1", "in2_2", "in2_3"}}, + {"In3", {"in3"}}}, + {{"Out1", {"out1"}}, {"Out2_mult", {"out2_1", "out2_2"}}}, {})); std::shared_ptr grad_test_op = f::OpRegistry::CreateGradOp(*test_op); - ASSERT_EQ(grad_test_op->inputs_.size(), 5UL + 3UL + 3UL); + ASSERT_EQ(grad_test_op->inputs_.size(), 3UL + 2UL + 2UL); EXPECT_EQ(grad_test_op->Input("In1"), "in1"); EXPECT_EQ(grad_test_op->Inputs("In2_mult"), std::vector({"in2_1", "in2_2", "in2_3"})); @@ -83,55 +84,49 @@ TEST(GradOpBuilder, MutiInOut) { EXPECT_EQ(grad_test_op->Input("Out1"), "out1"); EXPECT_EQ(grad_test_op->Inputs("Out2_mult"), std::vector({"out2_1", "out2_2"})); - EXPECT_EQ(grad_test_op->Input("Out1" + f::kGradVarSuffix), - "out1" + f::kGradVarSuffix); - EXPECT_EQ(grad_test_op->Inputs("Out2_mult" + f::kGradVarSuffix), + EXPECT_EQ(grad_test_op->Input(f::GradVarName("Out1")), + f::GradVarName("out1")); + EXPECT_EQ(grad_test_op->Inputs(f::GradVarName("Out2_mult")), std::vector( - {"out2_1" + f::kGradVarSuffix, "out2_2" + f::kGradVarSuffix})); + {f::GradVarName("out2_1"), f::GradVarName("out2_2")})); - ASSERT_EQ(grad_test_op->outputs_.size(), 5UL); - EXPECT_EQ(grad_test_op->Output("In1" + f::kGradVarSuffix), - "in1" + f::kGradVarSuffix); - EXPECT_EQ(grad_test_op->Outputs("In2_mult" + f::kGradVarSuffix), - std::vector({"in2_1" + f::kGradVarSuffix, - "in2_2" + f::kGradVarSuffix, - "in2_3" + f::kGradVarSuffix})); - EXPECT_EQ(grad_test_op->Output("In3" + f::kGradVarSuffix), - "in3" + f::kGradVarSuffix); + ASSERT_EQ(grad_test_op->outputs_.size(), 3UL); + EXPECT_EQ(grad_test_op->Output(f::GradVarName("In1")), f::GradVarName("in1")); + EXPECT_EQ(grad_test_op->Outputs(f::GradVarName("In2_mult")), + std::vector({f::GradVarName("in2_1"), + f::GradVarName("in2_2"), + f::GradVarName("in2_3")})); + EXPECT_EQ(grad_test_op->Output(f::GradVarName("In3")), f::GradVarName("in3")); } TEST(GradOpBuilder, IOIgnoredInGradient) { - f::AttributeMap attrs{{"input_format", std::vector{0, 1, 3, 5}}, - {"output_format", std::vector{0, 2, 3}}}; std::shared_ptr test_op(f::OpRegistry::CreateOp( - "io_ignored", {"in1", "in2_1", "in2_2", "in3_1", "in3_2"}, - {"out1_1", "out1_2", "out2"}, attrs)); + "io_ignored", {{"In1", {"in1"}}, + {"In2_mult", {"in2_1", "in2_2"}}, + {"In3_mult", {"in3_1", "in3_2"}}}, + {{"Out1_mult", {"out1_1", "out1_2"}}, {"Out2", {"out2"}}}, {})); std::shared_ptr grad_test_op = f::OpRegistry::CreateGradOp(*test_op); // 'In2' and 'Out2' are ignored in gradient calculating - ASSERT_EQ(grad_test_op->inputs_.size(), 5UL + 3UL + 3UL); + ASSERT_EQ(grad_test_op->inputs_.size(), 2UL + 1UL + 2UL); EXPECT_EQ(grad_test_op->Input("In1"), "in1"); - EXPECT_EQ(grad_test_op->Inputs("In2_mult"), - std::vector({f::kEmptyVarName, f::kEmptyVarName})); EXPECT_EQ(grad_test_op->Inputs("In3_mult"), std::vector({"in3_1", "in3_2"})); EXPECT_EQ(grad_test_op->Inputs("Out1_mult"), std::vector({"out1_1", "out1_2"})); - EXPECT_EQ(grad_test_op->Input("Out2"), f::kEmptyVarName); - EXPECT_EQ(grad_test_op->Inputs("Out1_mult" + f::kGradVarSuffix), + EXPECT_EQ(grad_test_op->Inputs(f::GradVarName("Out1_mult")), std::vector( - {"out1_1" + f::kGradVarSuffix, "out1_2" + f::kGradVarSuffix})); - EXPECT_EQ(grad_test_op->Input("Out2" + f::kGradVarSuffix), - "out2" + f::kGradVarSuffix); + {f::GradVarName("out1_1"), f::GradVarName("out1_2")})); + EXPECT_EQ(grad_test_op->Input(f::GradVarName("Out2")), + f::GradVarName("out2")); - ASSERT_EQ(grad_test_op->outputs_.size(), 5UL); - EXPECT_EQ(grad_test_op->Output("In1" + f::kGradVarSuffix), - "in1" + f::kGradVarSuffix); - EXPECT_EQ(grad_test_op->Outputs("In2_mult" + f::kGradVarSuffix), + ASSERT_EQ(grad_test_op->outputs_.size(), 3UL); + EXPECT_EQ(grad_test_op->Output(f::GradVarName("In1")), f::GradVarName("in1")); + EXPECT_EQ(grad_test_op->Outputs(f::GradVarName("In2_mult")), std::vector( - {"in2_1" + f::kGradVarSuffix, "in2_2" + f::kGradVarSuffix})); - EXPECT_EQ(grad_test_op->Outputs("In3_mult" + f::kGradVarSuffix), + {f::GradVarName("in2_1"), f::GradVarName("in2_2")})); + EXPECT_EQ(grad_test_op->Outputs(f::GradVarName("In3_mult")), std::vector( - {"in3_1" + f::kGradVarSuffix, "in3_2" + f::kGradVarSuffix})); + {f::GradVarName("in3_1"), f::GradVarName("in3_2")})); } diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc index 70045dbf7afd0935e4df852b2f0e3ecd163a9316..2b178907747b3911292b070b65160a24c120b726 100644 --- a/paddle/framework/lod_tensor.cc +++ b/paddle/framework/lod_tensor.cc @@ -19,32 +19,59 @@ namespace paddle { namespace framework { -LODTensor LODTensor::SliceShared(size_t level_begin, size_t level_end) const { - PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced."); - auto new_lod = details::SliceLOD(*lod_start_pos_, level_begin, level_end); - // slice levels just need to update LOD info, each level will contains the - // whole tensor_, so no need to modify tensor_. - return LODTensor(tensor_, new_lod); +LODTensor::LOD LODTensor::LOD::SliceLevels(size_t level_begin, + size_t level_end) const { + LOD new_lod; + new_lod.reserve(level_end - level_begin); + for (size_t i = level_begin; i < level_end; i++) { + new_lod.emplace_back(at(i)); + } + return new_lod; } -LODTensor LODTensor::SliceShared(size_t level, size_t elem_begin, - size_t elem_end) const { - PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced."); - PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, - NumLevels()); - PADDLE_ENFORCE(elem_begin < NumElements(level), - "element begin [%d] out of range [%d]", elem_begin, - NumElements(level)); - PADDLE_ENFORCE(elem_end < NumElements(level) + 1, - "element end [%d] out of range [%d]", elem_end, - NumElements(level)); - - auto new_lod = details::SliceLOD(*lod_start_pos_, level, elem_begin, elem_end, - true /*tensor_shared*/); - - // slice elements just need to update LOD info, because offsets are not - // changed, so the original tensor_ can be reused. - return LODTensor(tensor_, new_lod); +LODTensor::LOD LODTensor::LOD::SliceInLevel(size_t level, size_t elem_begin, + size_t elem_end) const { + // slice the lod. + LOD new_lod; + new_lod.reserve(size() - level); + auto start = this->at(level)[elem_begin]; + auto end = this->at(level)[elem_end]; + + for (auto it = this->begin() + level; it != this->end(); it++) { + auto it_begin = std::find(it->begin(), it->end(), start); + auto it_end = std::find(it_begin, it->end(), end); + PADDLE_ENFORCE(it_begin != it->end(), "error in parsing lod info"); + PADDLE_ENFORCE(it_end != it->end(), "error in parsing lod info"); + new_lod.emplace_back(it_begin, it_end + 1); + // reset offset if tensor is copyed and sliced. + std::transform(new_lod.back().begin(), new_lod.back().end(), + new_lod.back().begin(), + [start](int v) { return v - start; }); + PADDLE_ENFORCE_EQ(new_lod.back().front(), 0, "error in slice LOD"); + } + PADDLE_ENFORCE_LE(new_lod.size(), this->size()); + return new_lod; +} + +bool operator==(const LODTensor::LOD& a, const LODTensor::LOD& b) { + if (a.size() != b.size()) { + return false; + } + + for (size_t i = 0; i < a.size(); i++) { + const auto& a_level = a[i]; + const auto& b_level = b[i]; + if (a_level.size() != b_level.size()) { + return false; + } + for (size_t j = 0; j < a_level.size(); j++) { + if (a_level[j] != b_level[j]) { + return false; + } + } + } + + return true; } } // namespace framework diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h index 4933479b109694312e99595dc8ad6db70259efa6..9e27aec38d336db8a4f0adbed098d299aa741356 100644 --- a/paddle/framework/lod_tensor.h +++ b/paddle/framework/lod_tensor.h @@ -15,7 +15,7 @@ #pragma once #include -#if (!PADDLE_ONLY_CPU) +#if !defined(PADDLE_ONLY_CPU) #include #include #endif @@ -31,30 +31,29 @@ namespace framework { * LODTensor (Level of details Tensor) * see https://en.wikipedia.org/wiki/Level_of_details for reference. */ -class LODTensor { +class LODTensor : public Tensor { public: // Level save offsets of each unit. #ifdef PADDLE_ONLY_CPU - using Level = std::vector; + template + using Vector = std::vector; #else - using Level = thrust::device_vector; + template + using Vector = thrust::host_vector; #endif - // LOD stores offsets of each level of units, the largest units level first, + // LoD stores offsets of each level of units, the largest units level first, // then the smaller units level. Each Level stores the offsets of units in // Tesor. - typedef std::vector LOD; + class LOD : public std::vector> { + public: + LOD SliceLevels(size_t level_begin, size_t level_end) const; + LOD SliceInLevel(size_t level, size_t elem_begin, size_t elem_end) const; + }; LODTensor() {} - LODTensor(const std::shared_ptr &tensor, - const std::shared_ptr &lod) { - Reset(tensor, lod); - } + explicit LODTensor(const LOD &lod) : lod_(lod) {} - void Reset(const std::shared_ptr &tensor, - const std::shared_ptr &lod) { - tensor_ = tensor; - lod_start_pos_ = lod; - } + virtual Tensor *Clone() const { return new LODTensor(lod_); } /* * Get a element from LOD. @@ -65,16 +64,14 @@ class LODTensor { PADDLE_ENFORCE(elem < NumElements(level), "element begin [%d] out of range [%d]", elem, NumElements(level)); - return (*lod_start_pos_)[level][elem]; + return (lod_)[level][elem]; } /* * Number of LODTensor's levels, each level has units of data, for example, * in the sentence's view, article, paragraph, sentence are 3 levels. */ - size_t NumLevels() const { - return lod_start_pos_ ? lod_start_pos_->size() : 0UL; - } + size_t NumLevels() const { return lod_.size(); } /* * Number of elements in a level. */ @@ -82,64 +79,71 @@ class LODTensor { PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, NumLevels()); // the last offset is the end of last element - return lod_start_pos_->at(level).size() - 1; + return lod_[level].size() - 1; } - /* - * Slice of levels[level_begin:level_end], with tensor copied. - */ - template - LODTensor SliceCopied(size_t level_begin, size_t level_end, - const platform::Place &dst_place) const; - /* * Slice of levels[level_begin:level_end], with tensor shared. */ - LODTensor SliceShared(size_t level_begin, size_t level_end) const; - - /* - * Slice of elements of a level, [elem_begin: elem_end], with tensor copied. - * @note: low performance in slice lod_start_pos_. - */ template - LODTensor SliceCopied(size_t level, size_t elem_begin, size_t elem_end, - const platform::Place &dst_place) const; + LODTensor SliceLevels(size_t level_begin, size_t level_end) const; /* * Slice of elements of a level, [elem_begin: elem_end], with tensor shared. - * @note: low performance in slice lod_start_pos_. - */ - LODTensor SliceShared(size_t level, size_t elem_begin, size_t elem_end) const; - - /* - * Copy other's lod_start_pos_, to share LOD info. - * @note: the LOD info should not be changed. + * @note: low performance in slice lod_. */ - void ShareLOD(const LODTensor &other) { - lod_start_pos_ = other.lod_start_pos_; - } + template + LODTensor SliceInLevel(size_t level, size_t elem_begin, + size_t elem_end) const; /* - * Copy other's lod_start_pos_'s content, free to mutate. + * Copy other's lod_'s content, free to mutate. */ - void CopyLOD(const LODTensor &other) { - lod_start_pos_ = std::make_shared(*other.lod_start_pos_); - } + void CopyLOD(const LODTensor &other) { lod_ = other.lod_; } /* * Determine whether LODTensor has a valid LOD info. */ - bool HasLOD() const { return bool(lod_start_pos_); } - LOD *lod() const { return lod_start_pos_.get(); } + const LOD &lod() const { return lod_; } + LOD *mutable_lod() { return &lod_; } - std::shared_ptr &tensor() { return tensor_; } - Tensor *raw_tensor() { return tensor_.get(); } + virtual ~LODTensor() {} private: - std::shared_ptr lod_start_pos_; - std::shared_ptr tensor_; + LOD lod_; }; +bool operator==(const LODTensor::LOD &a, const LODTensor::LOD &b); + +template +LODTensor LODTensor::SliceLevels(size_t level_begin, size_t level_end) const { + auto new_lod = lod_.SliceLevels(level_begin, level_end); + // slice levels just need to update LOD info, each level will contains the + // whole tensor_, so no need to modify tensor_. + LODTensor new_tensor(new_lod); + new_tensor.ShareDataWith(*this); + return new_tensor; +} + +template +LODTensor LODTensor::SliceInLevel(size_t level, size_t elem_begin, + size_t elem_end) const { + PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, + NumLevels()); + PADDLE_ENFORCE(elem_begin < NumElements(level), + "element begin [%d] out of range [%d]", elem_begin, + NumElements(level)); + PADDLE_ENFORCE(elem_end < NumElements(level) + 1, + "element end [%d] out of range [%d]", elem_end, + NumElements(level)); + + auto new_lod = lod_.SliceInLevel(level, elem_begin, elem_end); + + // slice elements just need to update LOD info, because offsets are not + // changed, so the original tensor_ can be reused. + LODTensor new_tensor(new_lod); + new_tensor.ShareDataWith(*this); + return new_tensor; +} + } // namespace framework } // namespace paddle - -#include "paddle/framework/lod_tensor_impl.h" diff --git a/paddle/framework/lod_tensor_impl.h b/paddle/framework/lod_tensor_impl.h deleted file mode 100644 index 0eb6469aea3ae25f035751da985b5bebb489d961..0000000000000000000000000000000000000000 --- a/paddle/framework/lod_tensor_impl.h +++ /dev/null @@ -1,60 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#pragma once - -#include "paddle/framework/details/lod_tensor.h" - -namespace paddle { -namespace framework { - -template -LODTensor LODTensor::SliceCopied(size_t level_begin, size_t level_end, - const platform::Place &dst_place) const { - PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced."); - auto new_lod = details::SliceLOD(*lod_start_pos_, level_begin, level_end); - auto new_tensor = std::make_shared(); - new_tensor->CopyFrom(*tensor_, dst_place); - - return LODTensor(new_tensor, new_lod); -} - -template -LODTensor LODTensor::SliceCopied(size_t level, size_t elem_begin, - size_t elem_end, - const platform::Place &dst_place) const { - PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced."); - PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, - NumLevels()); - PADDLE_ENFORCE(elem_begin < NumElements(level), - "element begin [%d] out of range [%d]", elem_begin, - NumElements(level)); - PADDLE_ENFORCE(elem_end < NumElements(level) + 1, - "element end [%d] out of range [%d]", elem_end, - NumElements(level)); - - auto new_lod = details::SliceLOD(*lod_start_pos_, level, elem_begin, elem_end, - false /*tensor_shared*/); - - auto start_idx = new_lod->front().front(); - auto end_idx = new_lod->front().back() - 1 /*the next element's start*/; - auto sliced_tensor = tensor_->Slice(start_idx, end_idx); - auto new_tensor = std::make_shared(); - new_tensor->CopyFrom(sliced_tensor, dst_place); - - return LODTensor(new_tensor, new_lod); -} - -} // namespace framework -} // namespace paddle diff --git a/paddle/framework/lod_tensor_test.cc b/paddle/framework/lod_tensor_test.cc index 511716375e81e8fd89b071c940ee97327c268b8b..2881136ced6ef957a192e303e529b9b2867b3dda 100644 --- a/paddle/framework/lod_tensor_test.cc +++ b/paddle/framework/lod_tensor_test.cc @@ -15,6 +15,7 @@ #include #include +#include #include namespace paddle { @@ -29,22 +30,28 @@ class LODTensorTester : public ::testing::Test { // 0 10 20 // 0 5 10 15 20 // 0 2 5 7 10 12 15 20 - auto lod = std::make_shared(); - lod->push_back(std::vector{0, 10, 20}); - lod->push_back(std::vector{0, 5, 10, 15, 20}); - lod->push_back(std::vector{0, 2, 5, 7, 10, 12, 15, 17, 20}); + LODTensor::LOD lod; + lod.push_back(std::vector{0, 10, 20}); + lod.push_back(std::vector{0, 5, 10, 15, 20}); + lod.push_back(std::vector{0, 2, 5, 7, 10, 12, 15, 17, 20}); - auto tensor = std::make_shared(); - tensor->Resize({20 /*batch size*/, 128 /*dim*/}); + ASSERT_EQ(lod.size(), 3UL); + + tensor.Resize({20 /*batch size*/, 128 /*dim*/}); // malloc memory - tensor->mutable_data(place); + tensor.mutable_data(place); + + lod_tensor.reset(new LODTensor(lod)); + lod_tensor->Resize({20 /*batch size*/, 128 /*dim*/}); - lod_tensor->Reset(tensor, lod); + lod_tensor->ShareDataWith(tensor); + // lod_tensor->ShareDataWith(tensor); } protected: std::unique_ptr lod_tensor; platform::CPUPlace place; + Tensor tensor; }; TEST_F(LODTensorTester, NumLevels) { ASSERT_EQ(lod_tensor->NumLevels(), 3UL); } @@ -55,110 +62,54 @@ TEST_F(LODTensorTester, NumElements) { ASSERT_EQ(lod_tensor->NumElements(2), 8UL); } -TEST_F(LODTensorTester, SliceShared_Level) { - // slice 1 level - for (size_t level = 0; level < 3UL; ++level) { - auto new_lod_tensor = lod_tensor->SliceShared(level, level + 1); - ASSERT_EQ(new_lod_tensor.NumLevels(), 1UL); - ASSERT_EQ(new_lod_tensor.NumElements(0UL), lod_tensor->NumElements(level)); - ASSERT_EQ(new_lod_tensor.tensor(), lod_tensor->tensor()); - } - // slice 2 level - for (size_t level = 0; level < 2UL; ++level) { - auto new_lod_tensor = lod_tensor->SliceShared(level, level + 2); - ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); - ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor->NumElements(level)); - ASSERT_EQ(new_lod_tensor.NumElements(1), - lod_tensor->NumElements(level + 1)); - ASSERT_EQ(new_lod_tensor.tensor(), lod_tensor->tensor()); - } -} - -TEST_F(LODTensorTester, SliceCopied_Level) { +TEST_F(LODTensorTester, SliceLevels) { // slice 1 level for (size_t level = 0; level < 3UL; ++level) { - auto new_lod_tensor = - lod_tensor->SliceCopied(level, level + 1, place); + auto new_lod_tensor = lod_tensor->SliceLevels(level, level + 1); ASSERT_EQ(new_lod_tensor.NumLevels(), 1UL); ASSERT_EQ(new_lod_tensor.NumElements(0UL), lod_tensor->NumElements(level)); - // ASSERT_EQ(new_lod_tensor.tensor(), lod_tensor->tensor()); - // TODO(superjom) add tensor comparation here. + // ASSERT_EQ(new_lod_tensor, *lod_tensor); } // slice 2 level for (size_t level = 0; level < 2UL; ++level) { - auto new_lod_tensor = - lod_tensor->SliceCopied(level, level + 2, place); + auto new_lod_tensor = lod_tensor->SliceLevels(level, level + 2); ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor->NumElements(level)); ASSERT_EQ(new_lod_tensor.NumElements(1), lod_tensor->NumElements(level + 1)); - // ASSERT_EQ(new_lod_tensor.tensor(), lod_tensor->tensor()); - // TODO(superjom) add tensor comparation here. + ASSERT_EQ(new_lod_tensor.data(), lod_tensor->data()); } } -TEST_F(LODTensorTester, SliceShared_Element) { - size_t level = 0; - auto new_lod_tensor = lod_tensor->SliceShared(level, 0, 2); - ASSERT_EQ(new_lod_tensor.NumLevels(), 3UL); - ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL); - ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL); - ASSERT_EQ(new_lod_tensor.NumElements(2), 8UL); - ASSERT_EQ(new_lod_tensor.raw_tensor(), lod_tensor->raw_tensor()); - - level = 1; - new_lod_tensor = lod_tensor->SliceShared(level, 0, 2); - ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); - ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL); - ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL); - ASSERT_EQ(new_lod_tensor.raw_tensor(), lod_tensor->raw_tensor()); -} - -TEST_F(LODTensorTester, SliceCopied_Element) { +TEST_F(LODTensorTester, SliceInLevel) { size_t level = 0; - auto new_lod_tensor = lod_tensor->SliceCopied(level, 0, 2, place); - ASSERT_EQ(new_lod_tensor.NumLevels(), 3UL); - ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL); - ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL); - ASSERT_EQ(new_lod_tensor.NumElements(2), 8UL); - ASSERT_NE(new_lod_tensor.raw_tensor(), lod_tensor->raw_tensor()); + auto new_lod_tensor = lod_tensor->SliceInLevel(level, 0, 2); + EXPECT_EQ(new_lod_tensor.NumLevels(), 3UL); + EXPECT_EQ(new_lod_tensor.NumElements(0), 2UL); + EXPECT_EQ(new_lod_tensor.NumElements(1), 4UL); + EXPECT_EQ(new_lod_tensor.NumElements(2), 8UL); + ASSERT_EQ(new_lod_tensor.data(), lod_tensor->data()); level = 1; - new_lod_tensor = lod_tensor->SliceCopied(level, 0, 2, place); + new_lod_tensor = lod_tensor->SliceInLevel(level, 0, 2); ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL); ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL); - ASSERT_NE(new_lod_tensor.raw_tensor(), lod_tensor->raw_tensor()); - - level = 1; - // LOD is - // 0 5 10 - // 0 2 5 7 10 - new_lod_tensor = lod_tensor->SliceCopied(level, 1, 3, place); - ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); - ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL); - ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL); - - ASSERT_EQ(new_lod_tensor.lod_element(0, 0), 0UL); - ASSERT_EQ(new_lod_tensor.lod_element(0, 1), 5UL); - ASSERT_EQ(new_lod_tensor.lod_element(1, 0), 0UL); - ASSERT_EQ(new_lod_tensor.lod_element(1, 1), 2UL); - ASSERT_EQ(new_lod_tensor.lod_element(1, 2), 5UL); - ASSERT_EQ(new_lod_tensor.lod_element(1, 3), 7UL); - - // TODO(superjom) compare the content of these tensors + ASSERT_EQ(new_lod_tensor.data(), lod_tensor->data()); } TEST_F(LODTensorTester, ShareLOD) { LODTensor new_lod_tensor; - new_lod_tensor.ShareLOD(*lod_tensor); + new_lod_tensor.CopyLOD(*lod_tensor); ASSERT_EQ(new_lod_tensor.lod(), lod_tensor->lod()); } TEST_F(LODTensorTester, CopyLOD) { LODTensor new_lod_tensor; new_lod_tensor.CopyLOD(*lod_tensor); - ASSERT_NE(new_lod_tensor.lod(), lod_tensor->lod()); + bool equals = std::equal(lod_tensor->lod().begin(), lod_tensor->lod().end(), + new_lod_tensor.lod().begin()); + ASSERT_TRUE(equals); } } // namespace framework diff --git a/paddle/framework/op_desc.proto b/paddle/framework/op_desc.proto deleted file mode 100644 index d95ba26f88ae181f991440e0df30c80f80a7eb2a..0000000000000000000000000000000000000000 --- a/paddle/framework/op_desc.proto +++ /dev/null @@ -1,56 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -syntax = "proto2"; -package paddle.framework; - -import "attribute.proto"; - -// AttrDesc is used to describe Attributes of an Operator. It contain's -// name, type, and value of Attribute. -// -// e.g, for scale=3.0: name=scala, type=AttrType.FLOAT, value=3.0 -message AttrDesc { - required string name = 1; - required AttrType type = 2; - optional int32 i = 3; - optional float f = 4; - optional string s = 5; - repeated int32 ints = 6; - repeated float floats = 7; - repeated string strings = 8; -}; - -// Protocol Message to describe an Operator. -// -// In PaddlePaddle, Operator is used to do a certain computation such -// as "add", "sub", "cosine", etc. -// (1) Operator needs to know the input and output variable names. -// (2) Some ops may have special attributes such as "scale" in "CosineOp". -// -// 3rd-party language can build this proto message and call -// AddOp(const OpDesc& op_desc) of Paddle core to create an Operator. -message OpDesc { - // input names of this Operator. - repeated string inputs = 1; - - // output names of this Operator. - repeated string outputs = 2; - - // type of this Operator, such as "add", "sub", "fc". - required string type = 3; - - // Attributes of this Operator. e.g., scale=3.0 in cosine op. - repeated AttrDesc attrs = 4; -}; \ No newline at end of file diff --git a/paddle/framework/op_desc_test.cc b/paddle/framework/op_desc_test.cc deleted file mode 100644 index d0c52523b64725ee11c281b086f9ffed6a09e787..0000000000000000000000000000000000000000 --- a/paddle/framework/op_desc_test.cc +++ /dev/null @@ -1,35 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include - -TEST(OpDesc, Create) { - paddle::framework::OpDesc op_desc; - op_desc.set_type("add"); - op_desc.add_inputs("X"); - op_desc.add_inputs("Y"); - op_desc.add_outputs("Z"); - - auto attr = op_desc.mutable_attrs()->Add(); - attr->set_type(paddle::framework::AttrType::FLOAT); - attr->set_f(3.14); - - // required field name is not set, so IsInitialized should be false. - ASSERT_FALSE(op_desc.IsInitialized()); - - attr->set_name("add"); - // after all required fields are set, IsInitialized should be true now. - ASSERT_TRUE(op_desc.IsInitialized()); -} \ No newline at end of file diff --git a/paddle/framework/op_proto.proto b/paddle/framework/op_proto.proto deleted file mode 100644 index 52292162874b9ca207fb0d3917df41ade096b143..0000000000000000000000000000000000000000 --- a/paddle/framework/op_proto.proto +++ /dev/null @@ -1,116 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -// Protocol Message for 3rd-party language binding. -// -// Paddle Python package will use `OpProto` to generate op creation methods. -// The op creation methods take user's input and generate `OpDesc` proto -// message, -// then pass `OpDesc` to C++ side and create Op pointer. -// -syntax = "proto2"; -package paddle.framework; - -import "attribute.proto"; - -// Attribute protocol message for 3rd-party language binding. -// It will store the Op support what attribute and what type. -message AttrProto { - // Supported attribute name. e.g. `scale` for cosine op. - required string name = 1; - - // Supported attribute type. - required AttrType type = 2; - - // Supported attribute comments. It helps 3rd-party language generate - // doc-string. - required string comment = 3; - - // If that attribute is generated, it means the Paddle third language - // binding has responsibility to fill that attribute. End-User should - // not set that attribute. - optional bool generated = 4 [ default = false ]; -} - -// Input or output message for 3rd-party language binding. -// It contains parameter name and its comments. -message VarProto { - // Input or output name in that op creation function. - // e.g. `cos(a, b, output, ...)`, "a", "b", "output" are names. - required string name = 1; - - // The comment for that input. It helps 3rd-party language generate - // doc-string. - required string comment = 2; - - // Is that input/output could be a list or not. - // If so, that Op should write a attributed named `input_format` or - // `output_format`. - // - // e.g. - // If the op is a fc op, the inputs are `X`, `W`, `b`. The `X` and `W` - // could be multiple, so the multiple of `X` and `W` is True, and OpDesc - // will hold a attribute of them. - // - // The Op desc of same fc could be - // { - // "type": "fc", - // "input": ["X1", "X2", "W1", "W2", "b"], - // "output": "fc.out", - // "attrs" : { - // "input_format": [0, 2, 4, 5] - // } - // } - // - optional bool multiple = 3 [ default = false ]; - - // It marks that output is a temporary output. That output is not used by - // user, but used by other op internally as input. If other op is not use - // that output, it could be optimized early. - // - // Attribute temporary_index will be set in OpDesc if there is some - // outputs are temporary. - // - // output = [ "xxx.out1", "xxx.tmp", "xxx.out2"], - // attrs = { - // "temporary_index": [1] - // } - optional bool temporary = 4 [ default = false ]; - - // The gradient of operator can be ignored immediately - // e.g. operator AddOp, y = x1 + x2, the gradient of dy/dx1, dy/dx2 - // can be ignored for the future optimized on graph. - optional bool ignore_gradient = 6; -} - -// Op protocol message for 3rd-party language binding. -// It contains all information for generating op creation method. -message OpProto { - // The input information to generate op creation method. - repeated VarProto inputs = 1; - - // The output information to generate op creation method. - repeated VarProto outputs = 2; - - // The attribute information to generate op creation method. - repeated AttrProto attrs = 3; - - // The comments for that Op. It helps 3rd-party language generate - // doc-string. The whole documentation of that Op is generated by comment, - // inputs, outputs, attrs together. - required string comment = 4; - - // The type of that Op. - required string type = 5; -} diff --git a/paddle/framework/op_proto_test.cc b/paddle/framework/op_proto_test.cc deleted file mode 100644 index 9c054bde44e77571330cbc59074705f0cfc1cfb6..0000000000000000000000000000000000000000 --- a/paddle/framework/op_proto_test.cc +++ /dev/null @@ -1,31 +0,0 @@ -#include -#include - -TEST(TestOpProto, ALL) { - paddle::framework::OpProto proto; - { - auto ipt = proto.mutable_inputs()->Add(); - *ipt->mutable_name() = "a"; - *ipt->mutable_comment() = "the one input of cosine op"; - } - { - auto ipt = proto.mutable_inputs()->Add(); - *ipt->mutable_name() = "b"; - *ipt->mutable_comment() = "the other input of cosine op"; - } - { - auto opt = proto.mutable_outputs()->Add(); - *opt->mutable_name() = "output"; - *opt->mutable_comment() = "the output of cosine op"; - } - { - auto attr = proto.mutable_attrs()->Add(); - *attr->mutable_name() = "scale"; - attr->set_type(paddle::framework::AttrType::FLOAT); - *attr->mutable_comment() = "the scale attribute of cosine op"; - } - proto.set_type("cos"); - *proto.mutable_comment() = "cosine op, output = scale * cos(a, b)"; - - ASSERT_TRUE(proto.IsInitialized()); -} \ No newline at end of file diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 6c26183818a9d6996e3d3ce2af74ba36f4711eca..3b793628aa6fdb08544ba90274736c9d29262a8b 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -20,8 +20,9 @@ limitations under the License. */ #include #include #include "paddle/framework/attribute.h" +#include "paddle/framework/framework.pb.h" #include "paddle/framework/grad_op_builder.h" -#include "paddle/framework/op_desc.pb.h" +#include "paddle/framework/operator.h" #include "paddle/framework/scope.h" namespace paddle { @@ -44,111 +45,58 @@ class OpProtoAndCheckerMaker { protected: struct VariableBuilder { - VarProto* var_; - std::function on_multiple_; - std::function on_temporary_; + OpProto::Var* var_; - VariableBuilder& SetMultiple() { - var_->set_multiple(true); - on_multiple_(); + VariableBuilder& AsDuplicable() { + var_->set_duplicable(true); return *this; } - VariableBuilder& SetTemporary() { - PADDLE_ENFORCE(bool(on_temporary_), "Cannot set temporary"); - var_->set_temporary(true); - on_temporary_(); + VariableBuilder& AsIntermediate() { + var_->set_intermediate(true); return *this; } - VariableBuilder& IgnoreGradient() { - var_->set_ignore_gradient(true); + // TODO(FengJiayi, yuyang18): `AsNoGradient` is a very bad name, because it + // means that input/output is not needed when calculate gradient. It does + // not mean no gradient when backward. It should be changed soon. + VariableBuilder& AsNoGradient() { + var_->set_no_gradient(true); return *this; } }; VariableBuilder AddInput(const std::string& name, const std::string& comment) { - auto input = proto_->mutable_inputs()->Add(); - *input->mutable_name() = name; - *input->mutable_comment() = comment; - return VariableBuilder{input, [=] { this->SetHasMultipleInput(); }, - nullptr}; + auto* input = proto_->add_inputs(); + input->set_name(name); + input->set_comment(comment); + return VariableBuilder{input}; } VariableBuilder AddOutput(const std::string& name, const std::string& comment) { - auto output = proto_->mutable_outputs()->Add(); - *output->mutable_name() = name; - *output->mutable_comment() = comment; - return VariableBuilder{output, [=] { this->SetHasMultipleOutput(); }, - [=] { this->SetHasTemporaryOutput(); }}; + auto* output = proto_->add_outputs(); + output->set_name(name); + output->set_comment(comment); + return VariableBuilder{output}; } template TypedAttrChecker& AddAttr(const std::string& name, const std::string& comment, bool generated = false) { - auto attr = proto_->mutable_attrs()->Add(); - *attr->mutable_name() = name; - *attr->mutable_comment() = comment; + auto* attr = proto_->add_attrs(); + attr->set_name(name); + attr->set_comment(comment); attr->set_generated(generated); attr->set_type(AttrTypeID()); return op_checker_->AddAttrChecker(name); } - void AddComment(const std::string& comment) { - *(proto_->mutable_comment()) = comment; - } + void AddComment(const std::string& comment) { proto_->set_comment(comment); } private: - void SetHasMultiple(const std::string& in_out, bool* flag) { - if (!*flag) { - AddAttr>(in_out + "_format", - "The multiple index of " + in_out + - "\n" - R"DOC( -This attribute is used by Paddle core framework. Paddle's Op support each input -or output could be a list of variable. This attribute is used to show how that -list organized. - -e.g. - input = ["a", "b", "c", "d", "e", "f"] - input_format = [0, 4, 5, 6] - -means - The number of all input variables this op is six, and they are segmented into - three inputs. - - The first input is input[0:4], second is input[4:5], third is input[5:6]. -)DOC", - /*generated*/ true); - *flag = true; - } - } - - void SetHasMultipleInput() { SetHasMultiple("input", &has_multiple_input_); } - void SetHasMultipleOutput() { - SetHasMultiple("output", &has_multiple_output_); - } - - void SetHasTemporaryOutput() { - if (!has_temporary_output_) { - AddAttr>("temporary_index", - R"DOC(The temporary index of output. - -Not all output of Paddle Op is used by user. For faster computation, each op -could output some its internal state to other op, other op could take that -output to make compute faster. - -Add a mark to which output is temporary is helpful for future optimization. -)DOC", - /*generated*/ true) - .SetDefault(std::vector()); - has_temporary_output_ = true; - } - } - void CheckNoDuplicatedInOutAttrs() { std::unordered_set names; auto checker = [&](const std::string& name) { @@ -169,89 +117,74 @@ Add a mark to which output is temporary is helpful for future optimization. OpProto* proto_; OpAttrChecker* op_checker_; bool validated_{false}; - bool has_multiple_input_{false}; - bool has_multiple_output_{false}; - bool has_temporary_output_{false}; }; class OpRegistry { - using OpCreator = std::function; - using VarIndexMap = std::unordered_map; - using VarNameList = std::vector; + using VarNameMap = OperatorBase::VarNameMap; + using OpCreator = std::function; public: template static void RegisterOp(const std::string& op_type) { - op_creators()[op_type] = [] { return new OpType; }; + op_creators()[op_type] = []( + const std::string& type, const VarNameMap& inputs, + const VarNameMap& outputs, const AttributeMap& attrs) { + return new OpType(type, inputs, outputs, attrs); + }; OpAttrChecker& op_checker = op_checkers()[op_type]; - OpProto& op_proto = protos()[op_type]; + OpProto& op_proto = OpProtos()[op_type]; auto maker = ProtoMakerType(&op_proto, &op_checker); maker.Validate(); - *op_proto.mutable_type() = op_type; + op_proto.set_type(op_type); PADDLE_ENFORCE( op_proto.IsInitialized(), "Fail to initialize %s's OpProto, because %s is not initialized", op_type, op_proto.InitializationErrorString()); - - VarIndexMaps()[op_type].reset(new VarIndexMap()); - auto& varmap = *VarIndexMaps()[op_type]; - int idx = 0; - for (auto& var : op_proto.inputs()) { - varmap[var.name()] = idx++; - } - idx = 0; - for (auto& var : op_proto.outputs()) { - varmap[var.name()] = idx++; - } } template static void RegisterGradOp(const std::string& op_type, const std::string& grad_op_type) { - op_creators()[grad_op_type] = [] { return new GradOpType; }; + op_creators()[grad_op_type] = []( + const std::string& type, const VarNameMap& inputs, + const VarNameMap& outputs, const AttributeMap& attrs) { + return new GradOpType(type, inputs, outputs, attrs); + }; grad_ops()[op_type] = grad_op_type; } static std::shared_ptr CreateOp(const std::string& type, - const VarNameList& inputs, - const VarNameList& outputs, - const AttributeMap& attrs) { + const VarNameMap& inputs, + const VarNameMap& outputs, + AttributeMap attrs) { auto op_create_it = op_creators().find(type); PADDLE_ENFORCE(op_create_it != op_creators().end(), "Operator %s cannot be found.", type); + op_checkers().at(type).Check(attrs); - auto op = op_create_it->second(); - op->type_ = type; - op->inputs_ = inputs; - op->outputs_ = outputs; - - op->attrs_ = attrs; - op_checkers().at(type).Check(op->attrs_); + auto op = op_create_it->second(type, inputs, outputs, attrs); - GenerateTempVariableName(op); + return std::shared_ptr(op); + } - { - auto var_index_it = VarIndexMaps().find(type); - if (var_index_it != VarIndexMaps().end()) { - op->in_out_idxs_ = var_index_it->second; - } + static VarNameMap ConvertOpDescVarsToVarNameMap( + const google::protobuf::RepeatedPtrField& op_desc_vars) { + VarNameMap ret_val; + for (auto& var : op_desc_vars) { + auto& var_names = ret_val[var.parameter()]; + auto& var_names_in_proto = var.arguments(); + var_names.reserve(static_cast(var_names_in_proto.size())); + std::copy(var_names_in_proto.begin(), var_names_in_proto.end(), + std::back_inserter(var_names)); } - - op->Init(); - return std::shared_ptr(op); + return ret_val; } static std::shared_ptr CreateOp(const OpDesc& op_desc) { - std::vector inputs; - inputs.reserve((size_t)op_desc.inputs_size()); - std::copy(op_desc.inputs().begin(), op_desc.inputs().end(), - std::back_inserter(inputs)); - - std::vector outputs; - outputs.reserve((size_t)op_desc.outputs_size()); - std::copy(op_desc.outputs().begin(), op_desc.outputs().end(), - std::back_inserter(outputs)); - + VarNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs()); + VarNameMap outputs = ConvertOpDescVarsToVarNameMap(op_desc.outputs()); AttributeMap attrs; for (auto& attr : op_desc.attrs()) { attrs[attr.name()] = GetAttrValue(attr); @@ -264,26 +197,14 @@ class OpRegistry { PADDLE_ENFORCE(!op.IsNetOp(), "Use framework::Backward to get backward ops"); std::shared_ptr grad_op(BuildGradOp(&op)); - grad_op->Init(); return grad_op; } - static std::unordered_map& protos() { - static std::unordered_map protos_; - return protos_; - } - static std::unordered_map& grad_ops() { static std::unordered_map grad_ops_; return grad_ops_; } - static std::unordered_map>& - VarIndexMaps() { - static std::unordered_map> maps_; - return maps_; - } - static std::unordered_map& op_creators() { static std::unordered_map op_creators_; return op_creators_; @@ -294,35 +215,47 @@ class OpRegistry { static std::unordered_map op_checkers_; return op_checkers_; } +}; - static void GenerateTempVariableName(OperatorBase* op) { - static std::atomic gUniqId(0UL); - for (auto& outname : op->outputs_) { - if (outname == kTempVarName) { - outname += op->type_; - outname += "@"; - outname += std::to_string(gUniqId.fetch_add(1)); - } - } - } +class Registrar { + public: + // In our design, various kinds of classes, e.g., operators and kernels, have + // their corresponding registry and registrar. The action of registration is + // in the constructor of a global registrar variable, which, however, are not + // used in the code that calls package framework, and would be removed from + // the generated binary file by the linker. To avoid such removal, we add + // Touch to all registrar classes and make USE_OP macros to call this + // method. So, as long as the callee code calls USE_OP, the global + // registrar variable won't be removed by the linker. + void Touch() {} }; template -class OpRegisterHelper { +class OpRegistrar : public Registrar { public: - explicit OpRegisterHelper(const char* op_type) { + explicit OpRegistrar(const char* op_type) { OpRegistry::RegisterOp(op_type); } }; template -class GradOpRegisterHelper { +class GradOpRegistrar : public Registrar { public: - GradOpRegisterHelper(const char* op_type, const char* grad_op_type) { + GradOpRegistrar(const char* op_type, const char* grad_op_type) { OpRegistry::RegisterGradOp(op_type, grad_op_type); } }; +template +class OpKernelRegistrar : public Registrar { + public: + explicit OpKernelRegistrar(const char* op_type) { + OperatorWithKernel::OpKernelKey key; + key.place_ = PlaceType(); + OperatorWithKernel::AllOpKernels()[op_type][key].reset(new KernelType); + } +}; + /** * check if MACRO is used in GLOBAL NAMESPACE. */ @@ -333,97 +266,121 @@ class GradOpRegisterHelper { msg) /** - * Macro to Register Operator. + * Macro to register Operator. */ -#define REGISTER_OP(__op_type, __op_class, __op_maker_class) \ - STATIC_ASSERT_GLOBAL_NAMESPACE(__reg_op__##__op_type, \ - "REGISTER_OP must be in global namespace"); \ - static ::paddle::framework::OpRegisterHelper<__op_class, __op_maker_class> \ - __op_register_##__op_type##__(#__op_type); \ - int __op_register_##__op_type##_handle__() { return 0; } +#define REGISTER_OP(op_type, op_class, op_maker_class) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_op__##op_type, "REGISTER_OP must be called in global namespace"); \ + static ::paddle::framework::OpRegistrar \ + __op_registrar_##op_type##__(#op_type); \ + int TouchOpRegistrar_##op_type() { \ + __op_registrar_##op_type##__.Touch(); \ + return 0; \ + } /** - * Macro to Register Gradient Operator. + * Macro to register Gradient Operator. */ -#define REGISTER_GRADIENT_OP(__op_type, __grad_op_type, __grad_op_class) \ - STATIC_ASSERT_GLOBAL_NAMESPACE( \ - __reg_gradient_op__##__op_type##__grad_op_type, \ - "REGISTER_GRADIENT_OP must be in global namespace"); \ - static ::paddle::framework::GradOpRegisterHelper<__grad_op_class> \ - __op_gradient_register_##__op_type##__grad_op_type##__(#__op_type, \ - #__grad_op_type); \ - int __op_gradient_register_##__op_type##__grad_op_type##_handle__() { \ - return 0; \ +#define REGISTER_GRADIENT_OP(op_type, grad_op_type, grad_op_class) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_gradient_op__##op_type##_##grad_op_type, \ + "REGISTER_GRADIENT_OP must be called in global namespace"); \ + static ::paddle::framework::GradOpRegistrar \ + __op_gradient_registrar_##op_type##_##grad_op_type##__(#op_type, \ + #grad_op_type); \ + int TouchOpGradientRegistrar_##op_type() { \ + __op_gradient_registrar_##op_type##_##grad_op_type##__.Touch(); \ + return 0; \ } /** - * Macro to Forbid user register Gradient Operator. + * Macro to register OperatorKernel. */ -#define NO_GRADIENT(__op_type) \ - STATIC_ASSERT_GLOBAL_NAMESPACE( \ - __reg_gradient_op__##__op_type##__op_type##_grad, \ - "NO_GRADIENT must be in global namespace") +#define REGISTER_OP_KERNEL(op_type, DEVICE_TYPE, place_class, ...) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_op_kernel_##op_type##_##DEVICE_TYPE##__, \ + "REGISTER_OP_KERNEL must be called in global namespace"); \ + static ::paddle::framework::OpKernelRegistrar \ + __op_kernel_registrar_##op_type##_##DEVICE_TYPE##__(#op_type); \ + int TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE() { \ + __op_kernel_registrar_##op_type##_##DEVICE_TYPE##__.Touch(); \ + return 0; \ + } /** - * Macro to Register OperatorKernel. + * Macro to Forbid user register Gradient Operator. */ -#define REGISTER_OP_KERNEL(type, DEVICE_TYPE, PlaceType, ...) \ - STATIC_ASSERT_GLOBAL_NAMESPACE( \ - __reg_op_kernel_##type##_##DEVICE_TYPE##__, \ - "REGISTER_OP_KERNEL must be in global namespace"); \ - struct __op_kernel_register__##type##__##DEVICE_TYPE##__ { \ - __op_kernel_register__##type##__##DEVICE_TYPE##__() { \ - ::paddle::framework::OperatorWithKernel::OpKernelKey key; \ - key.place_ = PlaceType(); \ - ::paddle::framework::OperatorWithKernel::AllOpKernels()[#type][key] \ - .reset(new __VA_ARGS__()); \ - } \ - }; \ - static __op_kernel_register__##type##__##DEVICE_TYPE##__ \ - __reg_kernel_##type##__##DEVICE_TYPE##__; \ - int __op_kernel_register_##type##_handle_##DEVICE_TYPE##__() { return 0; } - -// (type, KernelType) -#define REGISTER_OP_GPU_KERNEL(type, ...) \ - REGISTER_OP_KERNEL(type, GPU, ::paddle::platform::GPUPlace, __VA_ARGS__) - -// (type, KernelType) -#define REGISTER_OP_CPU_KERNEL(type, ...) \ - REGISTER_OP_KERNEL(type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__) +#define NO_GRADIENT(op_type) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_gradient_op__##op_type##_##op_type##_grad, \ + "NO_GRADIENT must be called in global namespace") + +#define REGISTER_OP_GPU_KERNEL(op_type, ...) \ + REGISTER_OP_KERNEL(op_type, GPU, ::paddle::platform::GPUPlace, __VA_ARGS__) + +#define REGISTER_OP_CPU_KERNEL(op_type, ...) \ + REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__) /** * Macro to mark what Operator and Kernel we will use and tell the compiler to * link them into target. */ -#define USE_OP_WITHOUT_KERNEL(op_type) \ - STATIC_ASSERT_GLOBAL_NAMESPACE( \ - __use_op_without_kernel_##op_type, \ - "USE_OP_WITHOUT_KERNEL must be in global namespace"); \ - extern int __op_register_##op_type##_handle__(); \ - static int __use_op_ptr_##op_type##_without_kernel__ \ - __attribute__((unused)) = __op_register_##op_type##_handle__() - -#define USE_OP_KERNEL(op_type, DEVICE_TYPE) \ - STATIC_ASSERT_GLOBAL_NAMESPACE( \ - __use_op_kernel_##op_type##_##DEVICE_TYPE##__, \ - "USE_OP_KERNEL must be in global namespace"); \ - extern int __op_kernel_register_##op_type##_handle_##DEVICE_TYPE##__(); \ - static int __use_op_ptr_##op_type##_##DEVICE_TYPE##_kernel__ \ - __attribute__((unused)) = \ - __op_kernel_register_##op_type##_handle_##DEVICE_TYPE##__() - -// use Operator with only cpu kernel. -#define USE_OP_CPU(op_type) \ - USE_OP_WITHOUT_KERNEL(op_type); \ - USE_OP_KERNEL(op_type, CPU) +#define USE_OP_ITSELF(op_type) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __use_op_itself_##op_type, \ + "USE_OP_ITSELF must be called in global namespace"); \ + extern int TouchOpRegistrar_##op_type(); \ + static int use_op_itself_##op_type##_ __attribute__((unused)) = \ + TouchOpRegistrar_##op_type() + +// TODO(fengjiayi): Most ops' gradient op have not been compeleted. So we use +// `NO_GRAD` to disable micro USE_OP_GRADIENT(op_type). Otherwise the code can't +// be compiled. `NO_GRAD` should be removed after all gradient ops are +// compeleted. +#define NO_GRAD +#ifndef NO_GRAD +#define USE_OP_GRADIENT(op_type) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __use_op_gradient_##op_type, \ + "USE_OP_GRADIENT must be called in global namespace"); \ + extern int TouchOpGradientRegistrar_##op_type(); \ + static int use_op_gradient_##op_type##_ __attribute__((unused)) = \ + TouchOpGradientRegistrar_##op_type() +#else +#define USE_OP_GRADIENT(op_type) +#endif + +#define USE_OP_DEVICE_KERNEL(op_type, DEVICE_TYPE) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __use_op_kernel_##op_type##_##DEVICE_TYPE##__, \ + "USE_OP_DEVICE_KERNEL must be in global namespace"); \ + extern int TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE(); \ + static int use_op_kernel_##op_type##_##DEVICE_TYPE##_ \ + __attribute__((unused)) = \ + TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE() + +// TODO(fengjiayi): The following macros seems ugly, do we have better method? #ifdef PADDLE_ONLY_CPU -#define USE_OP(op_type) USE_OP_CPU(op_type) +#define USE_OP_KERNEL(op_type) USE_OP_DEVICE_KERNEL(op_type, CPU) #else -#define USE_OP(op_type) \ - USE_OP_CPU(op_type); \ - USE_OP_KERNEL(op_type, GPU) +#define USE_OP_KERNEL(op_type) \ + USE_OP_DEVICE_KERNEL(op_type, CPU); \ + USE_OP_DEVICE_KERNEL(op_type, GPU) #endif +#define USE_NO_GRAD_OP(op_type) \ + USE_OP_ITSELF(op_type); \ + USE_OP_KERNEL(op_type) + +#define USE_CPU_OP(op_type) \ + USE_OP_ITSELF(op_type); \ + USE_OP_DEVICE_KERNEL(op_type, CPU); \ + USE_OP_GRADIENT(op_type) + +#define USE_OP(op_type) \ + USE_NO_GRAD_OP(op_type); \ + USE_OP_GRADIENT(op_type) + } // namespace framework } // namespace paddle diff --git a/paddle/framework/op_registry_test.cc b/paddle/framework/op_registry_test.cc index 9894928a7aa19bc6c7ad8b230562fb9a681cfebd..0b8f8289490135b8976c38fa3fb3c2995c50416f 100644 --- a/paddle/framework/op_registry_test.cc +++ b/paddle/framework/op_registry_test.cc @@ -7,6 +7,7 @@ namespace paddle { namespace framework { class CosineOp : public OperatorBase { public: + using OperatorBase::OperatorBase; void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const override {} void InferShape(const Scope& scope) const override {} @@ -27,6 +28,7 @@ class CosineOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { class MyTestOp : public OperatorBase { public: + using OperatorBase::OperatorBase; void InferShape(const Scope& scope) const override {} void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const override {} @@ -36,8 +38,8 @@ class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { public: MyTestOpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("input", "input of cosine op").SetMultiple(); - AddOutput("output", "output of cosine op").SetTemporary(); + AddInput("input", "input of cosine op").AsDuplicable(); + AddOutput("output", "output of cosine op").AsIntermediate(); auto my_checker = [](int i) { PADDLE_ENFORCE(i % 2 == 0, "'test_attr' must be even!"); }; @@ -49,6 +51,15 @@ class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { } // namespace framework } // namespace paddle +static void BuildVar(const std::string& param_name, + std::initializer_list arguments, + paddle::framework::OpDesc::Var* var) { + var->set_parameter(param_name); + for (auto& arg_name : arguments) { + var->add_arguments(arg_name); + } +} + REGISTER_OP(cos_sim, paddle::framework::CosineOp, paddle::framework::CosineOpProtoAndCheckerMaker); REGISTER_OP(my_test_op, paddle::framework::MyTestOp, @@ -57,8 +68,8 @@ REGISTER_OP(my_test_op, paddle::framework::MyTestOp, TEST(OpRegistry, CreateOp) { paddle::framework::OpDesc op_desc; op_desc.set_type("cos_sim"); - op_desc.add_inputs("aa"); - op_desc.add_outputs("bb"); + BuildVar("input", {"aa"}, op_desc.add_inputs()); + BuildVar("output", {"bb"}, op_desc.add_outputs()); float scale = 3.3; auto attr = op_desc.mutable_attrs()->Add(); @@ -78,8 +89,8 @@ TEST(OpRegistry, CreateOp) { TEST(OpRegistry, IllegalAttr) { paddle::framework::OpDesc op_desc; op_desc.set_type("cos_sim"); - op_desc.add_inputs("aa"); - op_desc.add_outputs("bb"); + BuildVar("input", {"aa"}, op_desc.add_inputs()); + BuildVar("output", {"bb"}, op_desc.add_outputs()); auto attr = op_desc.mutable_attrs()->Add(); attr->set_name("scale"); @@ -103,8 +114,8 @@ TEST(OpRegistry, IllegalAttr) { TEST(OpRegistry, DefaultValue) { paddle::framework::OpDesc op_desc; op_desc.set_type("cos_sim"); - op_desc.add_inputs("aa"); - op_desc.add_outputs("bb"); + BuildVar("input", {"aa"}, op_desc.add_inputs()); + BuildVar("output", {"bb"}, op_desc.add_outputs()); ASSERT_TRUE(op_desc.IsInitialized()); @@ -116,20 +127,11 @@ TEST(OpRegistry, DefaultValue) { ASSERT_EQ(op->GetAttr("scale"), 1.0); } -static void SetInputFormat(paddle::framework::OpDesc* desc) { - auto attr = desc->add_attrs(); - attr->set_name("input_format"); - attr->set_type(paddle::framework::INTS); - attr->mutable_ints()->Add(0); - attr->mutable_ints()->Add(1); -} - TEST(OpRegistry, CustomChecker) { paddle::framework::OpDesc op_desc; op_desc.set_type("my_test_op"); - op_desc.add_inputs("ii"); - op_desc.add_outputs("oo"); - SetInputFormat(&op_desc); + BuildVar("input", {"ii"}, op_desc.add_inputs()); + BuildVar("output", {"oo"}, op_desc.add_outputs()); // attr 'test_attr' is not set bool caught = false; @@ -169,7 +171,6 @@ TEST(OpRegistry, CustomChecker) { attr->set_name("test_attr"); attr->set_type(paddle::framework::AttrType::INT); attr->set_i(4); - SetInputFormat(&op_desc); auto op = paddle::framework::OpRegistry::CreateOp(op_desc); paddle::platform::CPUDeviceContext dev_ctx; paddle::framework::Scope scope; diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index d9a013b883abdec4422806f90e36da7410a4fa0c..13442a72b9d77a4858b5d91dd7690e089ec7ed49 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include - #include "paddle/framework/operator.h" +#include +#include "paddle/framework/op_registry.h" namespace paddle { namespace framework { @@ -33,84 +33,139 @@ ExecutionContext::GetEigenDevice() const { } #endif -const std::string& OperatorBase::Input(const std::string& name) const { - PADDLE_ENFORCE_NOT_NULL(in_out_idxs_, - "Input Output Indices could not be nullptr"); - auto it = in_out_idxs_->find(name); - PADDLE_ENFORCE(it != in_out_idxs_->end(), "no key [%s] in in_out_idxs_", - name); - if (attrs_.count("input_format") == 0) { - return inputs_.at((size_t)it->second); - } else { - const auto& input_format = GetAttr>("input_format"); - int idx = input_format[it->second]; - return inputs_.at((size_t)idx); +static std::unordered_map* g_op_protos = nullptr; +std::unordered_map& OpProtos() { + if (g_op_protos == nullptr) { + g_op_protos = new std::unordered_map(); } + return *g_op_protos; } -std::vector OperatorBase::Inputs(const std::string& name) const { - PADDLE_ENFORCE_NOT_NULL(in_out_idxs_, "IO Idx could not be nullptr"); - auto input_format = GetAttr>("input_format"); - auto offset = in_out_idxs_->at(name); - PADDLE_ENFORCE(input_format.at(static_cast(offset) + 1) <= - static_cast(inputs_.size()), - "Input Out Of Range"); - - return std::vector{ - inputs_.begin() + input_format.at(offset), - inputs_.begin() + input_format.at(offset + 1)}; +const std::string& OperatorBase::Input(const std::string& name) const { + auto& ins = Inputs(name); + PADDLE_ENFORCE_EQ(ins.size(), 1UL, + "Op %s input %s should contain only one variable", type_, + name); + return ins[0]; } -const std::string& OperatorBase::Output(const std::string& name) const { - PADDLE_ENFORCE_NOT_NULL(in_out_idxs_, "InOut Indice could not be nullptr"); - auto it = in_out_idxs_->find(name); - PADDLE_ENFORCE(it != in_out_idxs_->end(), "no key [%s] in in_out_idxs_", +const std::vector& OperatorBase::Inputs( + const std::string& name) const { + auto it = inputs_.find(name); + PADDLE_ENFORCE(it != inputs_.end(), "Op %s do not have input %s", type_, name); - if (attrs_.count("output_format") == 0) { - return outputs_.at((size_t)it->second); - } else { - const auto& output_format = GetAttr>("output_format"); - int idx = output_format[it->second]; - return outputs_.at((size_t)idx); - } + return it->second; +} + +const std::string& OperatorBase::Output(const std::string& name) const { + auto& outs = Outputs(name); + PADDLE_ENFORCE_EQ(outs.size(), 1UL, + "Op %s output %s should contain only one variable", type_, + name); + return outs[0]; } -std::vector OperatorBase::Outputs(const std::string& name) const { - PADDLE_ENFORCE_NOT_NULL(in_out_idxs_, "InOut Indice could not be nullptr"); - auto output_format = GetAttr>("output_format"); - auto offset = in_out_idxs_->at(name); - PADDLE_ENFORCE(output_format.at(static_cast(offset) + 1) <= - static_cast(outputs_.size()), - "Output Out of Range"); - return std::vector{ - outputs_.begin() + output_format.at(offset), - outputs_.begin() + output_format.at(offset + 1)}; +const std::vector& OperatorBase::Outputs( + const std::string& name) const { + auto it = outputs_.find(name); + PADDLE_ENFORCE(it != outputs_.end(), "Op %s does not have output %s", type_, + name); + return it->second; } std::string OperatorBase::DebugString() const { std::stringstream ss; - ss << "Op(" << type_ << "), inputs:("; - for (size_t i = 0; i < inputs_.size(); ++i) { - ss << inputs_[i]; - if (i != inputs_.size() - 1) { + ss << "Op(" << type_ << "), inputs:{"; + for (auto it = inputs_.begin(); it != inputs_.end();) { + auto& input = *it; + ss << input.first << "["; + for (size_t i = 0; i < input.second.size(); ++i) { + ss << input.second[i]; + if (i != input.second.size() - 1) { + ss << ", "; + } + } + ss << "]"; + ++it; + if (it != inputs_.end()) { ss << ", "; } } - ss << "), outputs:("; - for (size_t i = 0; i < outputs_.size(); ++i) { - ss << outputs_[i]; - if (i != outputs_.size() - 1) { + ss << "}, outputs:{"; + for (auto it = outputs_.begin(); it != outputs_.end();) { + auto& output = *it; + ss << output.first << "["; + for (size_t i = 0; i < output.second.size(); ++i) { + ss << output.second[i]; + if (i != output.second.size() - 1) { + ss << ", "; + } + } + ss << "]"; + ++it; + if (it != outputs_.end()) { ss << ", "; } } - ss << ")."; + ss << "}."; return ss.str(); } void OperatorBase::Rename(const std::string& old_name, const std::string& new_name) { - std::replace(inputs_.begin(), inputs_.end(), old_name, new_name); - std::replace(outputs_.begin(), outputs_.end(), old_name, new_name); + for (auto& input : inputs_) { + std::replace(input.second.begin(), input.second.end(), old_name, new_name); + } + for (auto& output : outputs_) { + std::replace(output.second.begin(), output.second.end(), old_name, + new_name); + } +} + +OperatorBase::OperatorBase(const std::string& type, + const OperatorBase::VarNameMap& inputs, + const OperatorBase::VarNameMap& outputs, + const AttributeMap& attrs) + : type_(type), inputs_(inputs), outputs_(outputs), attrs_(attrs) { + static std::atomic gUniqId(0UL); + for (auto& output : outputs_) { + for (auto& output_name : output.second) { + if (output_name == kTempVarName) { + output_name += type_; + output_name += "@"; + output_name += std::to_string(gUniqId.fetch_add(1)); + } + } + } +} + +std::vector OperatorBase::OutputVars(bool has_intermediate) const { + std::vector ret_val; + if (has_intermediate) { + // push all outputs into ret_val + for (auto& o : outputs_) { + ret_val.reserve(ret_val.size() + o.second.size()); + ret_val.insert(ret_val.end(), o.second.begin(), o.second.end()); + } + return ret_val; + } + auto it = OpProtos().find(type_); + PADDLE_ENFORCE( + it != OpProtos().end(), + "Operator %s not registered, cannot figure out intermediate outputs", + type_); + + // get all OpProto::Var for outputs + for (auto& o : it->second.outputs()) { + // ignore all intermediate output + if (o.intermediate()) continue; + auto out = outputs_.find(o.name()); + if (out != outputs_.end()) { + ret_val.reserve(ret_val.size() + out->second.size()); + ret_val.insert(ret_val.end(), out->second.begin(), out->second.end()); + } + } + return ret_val; } } // namespace framework diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index c324fa6702de1eabab3f75cbf4e6568c99b60470..4a72ced6ced92054eb170cd3012cafb181744953 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -15,42 +15,43 @@ limitations under the License. */ #pragma once #include -#include #include #include #include #include "paddle/framework/attribute.h" -#include "paddle/framework/op_desc.pb.h" -#include "paddle/framework/op_proto.pb.h" +#include "paddle/framework/framework.pb.h" #include "paddle/framework/scope.h" #include "paddle/framework/tensor.h" #include "paddle/platform/device_context.h" #include "paddle/platform/place.h" +#include "paddle/platform/variant.h" #include "paddle/utils/Error.h" namespace paddle { namespace framework { /// If a variable is a empty variable, that name will be used. -const std::string kEmptyVarName = "@EMPTY@"; +constexpr char kEmptyVarName[] = "@EMPTY@"; /// If a variable is a temporary variable, that name will be set in Python, /// but it will be convert to a unique name in scope after OpCreator. -const std::string kTempVarName = "@TEMP@"; +constexpr char kTempVarName[] = "@TEMP@"; /// If a variable's name has a certain suffix, it means that the /// variable is the gradient of another varibale. /// e.g. Variable "x@GRAD" is the gradient of varibale "x". -const std::string kGradVarSuffix = "@GRAD"; +constexpr char kGradVarSuffix[] = "@GRAD"; /// Variables with this suffix are supposed to be filled up with zeros. -const std::string kZeroVarSuffix = "@ZERO"; +constexpr char kZeroVarSuffix[] = "@ZERO"; inline std::string GradVarName(const std::string& var_name) { return var_name + kGradVarSuffix; } +extern std::unordered_map& OpProtos(); + class OperatorBase; class InferShapeContext; class ExecutionContext; @@ -63,6 +64,15 @@ class ExecutionContext; */ class OperatorBase { public: + using VarNameMap = std::map>; + + OperatorBase(const std::string& type, const VarNameMap& inputs, + const VarNameMap& outputs, const AttributeMap& attrs); + + OperatorBase(const OperatorBase& o) = delete; + OperatorBase& operator=(const OperatorBase& o) = delete; + OperatorBase(OperatorBase&& o) = delete; + virtual ~OperatorBase() {} template @@ -74,10 +84,6 @@ class OperatorBase { virtual std::string DebugString() const; - /// Init will be called after CreateOperator, you can put some initialization - /// logic here. - virtual void Init() {} - /// InferShape infer the size of Variables used by this Operator with /// information inside scope virtual void InferShape(const Scope& scope) const = 0; @@ -95,15 +101,19 @@ class OperatorBase { //! Get a input with argument's name described in `op_proto` const std::string& Input(const std::string& name) const; - //! Get a input which has multiple variables. - //! TODO add a vector_view to prevent memory copy. - std::vector Inputs(const std::string& name) const; + const std::vector& Inputs(const std::string& name) const; + //! Get a output with argument's name described in `op_proto` const std::string& Output(const std::string& name) const; //! Get an output which has multiple variables. //! TODO add a vector_view to prevent memory copy. - std::vector Outputs(const std::string& name) const; + const std::vector& Outputs(const std::string& name) const; + + virtual std::vector OutputVars(bool has_intermediate) const; + + std::string Type() const { return type_; } + const AttributeMap& Attrs() const { return attrs_; } public: std::string type_; @@ -111,30 +121,25 @@ class OperatorBase { // I (Inputs) // O (Outputs) // OG (Output Gradients) - std::vector inputs_; + VarNameMap inputs_; + // NOTE: in case of OpGrad, outputs_ contains // IG (Inputs Gradients) - std::vector outputs_; + VarNameMap outputs_; AttributeMap attrs_; - // store the arguments' offset described in op_desc. - std::shared_ptr> in_out_idxs_; }; -class OperatorContext { +class InferShapeContext { public: - OperatorContext(const OperatorBase* op, const Scope& scope) - : op_(*op), scope_(scope) {} + InferShapeContext(const OperatorBase& op, const Scope& scope) + : op_(op), scope_(scope) {} - size_t InputSize() const { return op_.inputs_.size(); } - - size_t OutputSize() const { return op_.outputs_.size(); } - - const Variable* InputVar(const size_t index) const { - return scope_.FindVar(op_.inputs_.at(index)); + size_t InputSize(const std::string& name) const { + return op_.Inputs(name).size(); } - Variable* OutputVar(const size_t index) const { - return scope_.FindVar(op_.outputs_.at(index)); + size_t OutputSize(const std::string& name) const { + return op_.Outputs(name).size(); } const Variable* InputVar(const std::string& name) const { @@ -166,27 +171,9 @@ class OperatorContext { return res; } - template - const T* Input(const size_t index) const { - auto var = InputVar(index); - PADDLE_ENFORCE_NOT_NULL(var, "Input(%d) should not be nullptr", index); - return &var->Get(); - } - - template - T* Output(const size_t index) const { - auto var = OutputVar(index); - PADDLE_ENFORCE_NOT_NULL( - var, - "Output(%d) not be nullptr, which means variable [%s] does not " - "exist in scope", - index, op_.outputs_[index]); - return var->GetMutable(); - } - template const T* Input(const std::string& name) const { - auto var = InputVar(name); + auto* var = InputVar(name); PADDLE_ENFORCE_NOT_NULL(var, "Input(%s) should not be nullptr", name); return &var->Get(); } @@ -234,12 +221,6 @@ class OperatorContext { const Scope& scope_; }; -class InferShapeContext : public OperatorContext { - public: - InferShapeContext(const OperatorBase* op, const Scope& scope) - : OperatorContext(op, scope) {} -}; - template struct EigenDeviceConverter; @@ -255,11 +236,11 @@ struct EigenDeviceConverter { }; #endif -class ExecutionContext : public OperatorContext { +class ExecutionContext : public InferShapeContext { public: - ExecutionContext(const OperatorBase* op, const Scope& scope, + ExecutionContext(const OperatorBase& op, const Scope& scope, const platform::DeviceContext* device_context) - : OperatorContext(op, scope), device_context_(device_context) {} + : InferShapeContext(op, scope), device_context_(device_context) {} template GetPlace(); } + const platform::DeviceContext* device_context() const { + return device_context_; + } + const platform::DeviceContext* device_context_; }; @@ -310,14 +295,18 @@ class OperatorWithKernel : public OperatorBase { using OpKernelMap = std::unordered_map, OpKernelHash>; + OperatorWithKernel(const std::string& type, const VarNameMap& inputs, + const VarNameMap& outputs, const AttributeMap& attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + void InferShape(const Scope& scope) const override { - InferShape(InferShapeContext(this, scope)); + InferShape(InferShapeContext(*this, scope)); } void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const final { auto& opKernel = AllOpKernels().at(type_).at(OpKernelKey(dev_ctx)); - opKernel->Compute(ExecutionContext(this, scope, &dev_ctx)); + opKernel->Compute(ExecutionContext(*this, scope, &dev_ctx)); } static std::unordered_map& diff --git a/paddle/framework/operator_test.cc b/paddle/framework/operator_test.cc index 387aada749ba62246b44dedc050547c05955caa9..6804841587730d51d9cfad30a9de81401d36695b 100644 --- a/paddle/framework/operator_test.cc +++ b/paddle/framework/operator_test.cc @@ -23,20 +23,22 @@ static int op_run_num = 0; class OpWithoutKernelTest : public OperatorBase { public: - void Init() override { x = 1; } + OpWithoutKernelTest(const std::string& type, const VarNameMap& inputs, + const VarNameMap& outputs, const AttributeMap& attrs) + : OperatorBase(type, inputs, outputs, attrs), x(1) {} void InferShape(const Scope& scope) const override {} void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const override { - op_run_num++; - ASSERT_EQ((int)inputs_.size(), 1); - ASSERT_EQ((int)outputs_.size(), 1); - ASSERT_EQ(scope.FindVar(inputs_[0]), nullptr); + ++op_run_num; + ASSERT_EQ(static_cast(inputs_.size()), 1); + ASSERT_EQ(static_cast(outputs_.size()), 1); + ASSERT_EQ(scope.FindVar(inputs_.at("input")[0]), nullptr); ASSERT_EQ(x, 1); - ASSERT_NE(scope.FindVar(outputs_[0]), nullptr); + ASSERT_NE(scope.FindVar(outputs_.at("output")[0]), nullptr); } public: - float x = 0; + int x{0}; }; class OpeWithoutKernelTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker { @@ -54,14 +56,24 @@ class OpeWithoutKernelTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker { } // namespace framework } // namespace paddle +static void BuildVar(const std::string& param_name, + std::initializer_list arguments, + paddle::framework::OpDesc::Var* var) { + var->set_parameter(param_name); + for (auto& arg_name : arguments) { + *var->mutable_arguments()->Add() = arg_name; + } +} + REGISTER_OP(test_operator, paddle::framework::OpWithoutKernelTest, paddle::framework::OpeWithoutKernelTestProtoAndCheckerMaker); TEST(OperatorBase, all) { paddle::framework::OpDesc op_desc; op_desc.set_type("test_operator"); - *op_desc.mutable_inputs()->Add() = "IN1"; - *op_desc.mutable_outputs()->Add() = "OUT1"; + BuildVar("input", {"IN1"}, op_desc.add_inputs()); + BuildVar("output", {"OUT1"}, op_desc.add_outputs()); + auto attr = op_desc.mutable_attrs()->Add(); attr->set_name("scale"); attr->set_type(paddle::framework::AttrType::FLOAT); @@ -97,6 +109,9 @@ class OpKernelTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker { static int cpu_kernel_run_num = 0; class OpWithKernelTest : public OperatorWithKernel { + public: + using OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext& ctx) const override {} }; @@ -113,33 +128,15 @@ class CPUKernelTest : public OpKernel { } }; -// multiple inputs test -class OperatorMultiInputsTest : public OperatorBase { - public: - void Init() override { x = 1; } - void InferShape(const Scope& scope) const override {} - void Run(const Scope& scope, - const platform::DeviceContext& dev_ctx) const override { - ASSERT_EQ(scope.FindVar(inputs_[0]), nullptr); - ASSERT_EQ(x, 1); - ASSERT_NE(scope.FindVar(outputs_[0]), nullptr); - ASSERT_EQ(Input("x"), "IN1"); - ASSERT_EQ(Input("y"), "OUT1"); - } - - public: - float x = 0; -}; - class OpKernelTestMultiInputsProtoAndCheckerMaker : public OpProtoAndCheckerMaker { public: OpKernelTestMultiInputsProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("xs", "inputs of test op").SetMultiple(); + AddInput("xs", "inputs of test op").AsDuplicable(); AddInput("k", "input of test op"); - AddOutput("ys", "outputs of test op").SetMultiple(); + AddOutput("ys", "outputs of test op").AsDuplicable(); AddAttr("scale", "scale of cosine op") .SetDefault(1.0) .LargerThan(0.0); @@ -196,8 +193,9 @@ REGISTER_OP_CPU_KERNEL(op_with_kernel, TEST(OpKernel, all) { paddle::framework::OpDesc op_desc; op_desc.set_type("op_with_kernel"); - *op_desc.mutable_inputs()->Add() = "IN1"; - *op_desc.mutable_outputs()->Add() = "OUT1"; + BuildVar("x", {"IN1"}, op_desc.add_inputs()); + BuildVar("y", {"OUT1"}, op_desc.add_outputs()); + auto attr = op_desc.mutable_attrs()->Add(); attr->set_name("scale"); attr->set_type(paddle::framework::AttrType::FLOAT); @@ -223,32 +221,15 @@ TEST(OpKernel, multi_inputs) { OpDesc op_desc; op_desc.set_type("op_multi_inputs_with_kernel"); - *op_desc.mutable_inputs()->Add() = "x0"; - *op_desc.mutable_inputs()->Add() = "x1"; - *op_desc.mutable_inputs()->Add() = "x2"; - *op_desc.mutable_inputs()->Add() = "k0"; - *op_desc.mutable_outputs()->Add() = "y0"; - *op_desc.mutable_outputs()->Add() = "y1"; + BuildVar("xs", {"x0", "x1", "x2"}, op_desc.add_inputs()); + BuildVar("k", {"k0"}, op_desc.add_inputs()); + BuildVar("ys", {"y0", "y1"}, op_desc.add_outputs()); + auto attr = op_desc.mutable_attrs()->Add(); attr->set_name("scale"); attr->set_type(paddle::framework::AttrType::FLOAT); attr->set_f(3.14); - auto attr0 = op_desc.mutable_attrs()->Add(); - attr0->set_name("input_format"); - attr0->set_type(paddle::framework::AttrType::INTS); - auto input_format = attr0->mutable_ints(); - input_format->Add(0); // x0 - input_format->Add(3); // k - input_format->Add(4); // end - - auto attr1 = op_desc.mutable_attrs()->Add(); - attr1->set_name("output_format"); - attr1->set_type(paddle::framework::AttrType::INTS); - auto output_format = attr1->mutable_ints(); - output_format->Add(0); // y0 - output_format->Add(2); // y1 - paddle::platform::CPUDeviceContext cpu_device_context; paddle::framework::Scope scope; scope.NewVar("x0")->GetMutable(); diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index 915ffb1c007c24eca457a0c646378bc191a15810..07b42c83717652bdf0120b3004f39ac7f7a98d06 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -22,6 +22,7 @@ limitations under the License. */ #include "paddle/operators/net_op.h" #include "paddle/platform/enforce.h" #include "paddle/platform/place.h" +#include "paddle/string/to_string.h" #include "pybind11/numpy.h" #include "pybind11/pybind11.h" #include "pybind11/stl.h" @@ -29,17 +30,18 @@ limitations under the License. */ namespace py = pybind11; USE_OP(add_two); -USE_OP_CPU(onehot_cross_entropy); -USE_OP_WITHOUT_KERNEL(fc); -USE_OP(sgd); +USE_CPU_OP(onehot_cross_entropy); +USE_NO_GRAD_OP(sgd); USE_OP(mul); USE_OP(mean); USE_OP(sigmoid); USE_OP(softmax); USE_OP(rowwise_add); USE_OP(fill_zeros_like); -USE_OP_WITHOUT_KERNEL(recurrent_op); +USE_OP_ITSELF(recurrent_op); +USE_OP(gaussian_random); USE_OP(uniform_random); + namespace paddle { namespace framework { @@ -54,30 +56,18 @@ void ExposeOperator(ClassType &m) { return op.type_; }) .def("outputs", - [](const typename ClassType::type &op) -> std::vector { - return op.outputs_; - }) + [](const typename ClassType::type &op) + -> std::map> { + return op.outputs_; + }) .def("inputs", - [](const typename ClassType::type &op) -> std::vector { - return op.inputs_; + [](const typename ClassType::type &op) { return op.inputs_; }) + .def("__str__", &ClassType::type::DebugString) + .def("no_intermediate_outputs", + [](const typename ClassType::type &op) { + return op.OutputVars(false); }) - .def("support_gpu", &ClassType::type::SupportGPU) - .def("temp_outputs", - [](const typename ClassType::type &op) -> std::vector { - auto iter = op.attrs_.find("temporary_index"); - std::vector ret; - if (iter == op.attrs_.end()) { - return ret; - } else { - auto tmp_idx = boost::get>(iter->second); - ret.reserve(tmp_idx.size()); - for (auto &index : tmp_idx) { - ret.push_back(op.outputs_.at(index)); - } - return ret; - } - }) - .def("__str__", &ClassType::type::DebugString); + .def("support_gpu", &ClassType::type::SupportGPU); } static size_t UniqueIntegerGenerator() { @@ -170,7 +160,7 @@ All parameter, weight, gradient are variables in Paddle. //! @note: Be careful! PyBind will return std::string as an unicode, not //! Python str. If you want a str object, you should cast them in Python. m.def("get_all_op_protos", []() -> std::vector { - auto &protos = OpRegistry::protos(); + auto &protos = OpProtos(); std::vector ret_values; for (auto it = protos.begin(); it != protos.end(); ++it) { PADDLE_ENFORCE(it->second.IsInitialized(), @@ -205,9 +195,13 @@ All parameter, weight, gradient are variables in Paddle. }); // clang-format on - py::class_(m, "GPUPlace").def(py::init()); + py::class_(m, "GPUPlace") + .def(py::init()) + .def("__str__", string::to_string); - py::class_(m, "CPUPlace").def(py::init<>()); + py::class_(m, "CPUPlace") + .def(py::init<>()) + .def("__str__", string::to_string); py::class_> operator_base( m, "Operator"); diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index b57958591fb752132407c35958db0781d0e023f0..b8c779f4e5fc7bc51298cdd35b26c2c8ac98edf6 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -79,11 +79,11 @@ class Tensor { inline const DDim& dims() const; /*! Resize the dimensions of the memory block. */ - inline void Resize(const DDim& dims); + inline Tensor& Resize(const DDim& dims); /*! The internal of two tensors share the same memory block. */ template - inline void ShareDataWith(const Tensor& src); + inline Tensor& ShareDataWith(const Tensor& src); /** * @brief Copy the content of external tensor to a new place. @@ -105,6 +105,8 @@ class Tensor { template inline Tensor Slice(const int& begin_idx, const int& end_idx) const; + platform::Place place() const { return holder_->place(); } + private: template inline void check_memory_size() const; diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h index 8d9bec6dc9c3f0af822a0d8cd8588dc932970652..7d7263b899afb7a2128548f264065a8013b6f0c9 100644 --- a/paddle/framework/tensor_impl.h +++ b/paddle/framework/tensor_impl.h @@ -23,9 +23,11 @@ template inline void Tensor::check_memory_size() const { PADDLE_ENFORCE_NOT_NULL( holder_, "Tenosr holds no memory. Call Tensor::mutable_data first."); - PADDLE_ENFORCE_GE(holder_->size(), product(dims_) * sizeof(T) + offset_, - "Tensor's dims_ is out of bound. Call Tensor::mutable_data " - "first to re-allocate memory."); + PADDLE_ENFORCE_GE( + holder_->size(), product(dims_) * sizeof(T) + offset_, + "Tensor's dims_ is out of bound. Call Tensor::mutable_data " + "first to re-allocate memory.\n" + "or maybe the required data-type mismatches the data already stored."); } template @@ -78,9 +80,10 @@ inline T* Tensor::mutable_data(platform::Place place) { } template -inline void Tensor::ShareDataWith(const Tensor& src) { +inline Tensor& Tensor::ShareDataWith(const Tensor& src) { src.check_memory_size(); *this = src; + return *this; } template @@ -136,7 +139,10 @@ inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const { return dst; } -inline void Tensor::Resize(const DDim& dims) { dims_ = dims; } +inline Tensor& Tensor::Resize(const DDim& dims) { + dims_ = dims; + return *this; +} inline const DDim& Tensor::dims() const { return dims_; } diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index 93304f73037690b5cf3ac8189aabc28f51316a77..7dfb6f61c50959f7269725a00dbc4f9c27474bdf 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -38,10 +38,11 @@ if(WITH_GPU) add_simple_unittest(RowConvOpTest) add_simple_unittest(BlockExpandOpTest) add_simple_unittest(CropOpTest) + add_simple_unittest(DepthwiseConvOpTest) endif() -add_simple_unittest(ConvOpTest) add_simple_unittest(Im2ColTest) +add_simple_unittest(GemmConvOpTest) endif() add_style_check_target(paddle_function ${h_files}) diff --git a/paddle/function/ConvOpTest.cpp b/paddle/function/ConvOpTest.cpp deleted file mode 100644 index 7f32c734791853a8cd0287a80a7955dbd1bd7571..0000000000000000000000000000000000000000 --- a/paddle/function/ConvOpTest.cpp +++ /dev/null @@ -1,306 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include "Function.h" -#include "FunctionTest.h" - -namespace paddle { - -enum TestType { - kForwardTest = 0, - kBackwardInputTest = 1, - kBackwardFilterTest = 2, -}; - -template -class ConvolutionTest { -public: - ConvolutionTest(const std::string& conv1, - const std::string& conv2, - TestType type, - bool useGroups = true, - std::string algo = "auto") { - for (size_t batchSize : {1, 32}) { - for (size_t inputSize : {7, 14, 54}) { - for (size_t filterSize : {1, 3, 5}) { - for (size_t inputChannels : {3, 64}) { - for (size_t outputChannels : {3, 64}) { - if (inputChannels > outputChannels) break; - size_t groups; - if (!useGroups) { - groups = 1; - } else { - if (outputChannels % inputChannels != 0) continue; - groups = inputChannels; - } - - for (size_t stride : {1, 2}) { - for (size_t padding : {0, 1}) { - if (padding >= filterSize) break; - size_t outputSize = - (inputSize - filterSize + 2 * padding + stride) / stride; - VLOG(3) << " batchSize=" << batchSize - << " inputChannels=" << inputChannels - << " inputHeight=" << inputSize - << " inputWidth=" << inputSize - << " outputChannels=" << outputChannels - << " filterHeight=" << filterSize - << " filterWidth=" << filterSize - << " outputHeight=" << outputSize - << " outputWidth=" << outputSize - << " stride=" << stride << " padding=" << padding; - - std::vector paddings = {padding, padding}; - std::vector strides = {stride, stride}; - Compare2Function test( - conv1, - conv2, - FuncConfig() - .set("paddings", paddings) - .set("strides", strides) - .set("groups", groups) - .set("algo", algo)); - - TensorShape input{ - batchSize, inputChannels, inputSize, inputSize}; - - TensorShape filter; - if (groups > 1) - filter = TensorShape({groups, - outputChannels / groups, - inputChannels / groups, - filterSize, - filterSize}); - else - filter = TensorShape({outputChannels, - inputChannels, - filterSize, - filterSize}); - TensorShape output{ - batchSize, outputChannels, outputSize, outputSize}; - - if (type == kForwardTest) { - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, output)); - test.run(); - } else if (type == kBackwardInputTest) { - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, input), ADD_TO); - test.run(); - } else if (type == kBackwardFilterTest) { - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter), - ADD_TO); - test.run(); - } - } - } - } - } - } - } - } - } -}; - -// Mainly used to test cases where the height and width (input, filter) -// are not equal. -template -class ConvolutionTest2 { -public: - ConvolutionTest2(const std::string& conv1, - const std::string& conv2, - TestType type, - bool useGroups = true, - std::string algo = "auto") { - for (size_t batchSize : {16}) { - for (size_t inputHeight : {7, 31}) { - for (size_t inputWidth : {10, 54}) { - for (size_t filterHeight : {1, 5}) { - for (size_t filterWidth : {3, 7}) { - for (size_t inputChannels : {7}) { - for (size_t outputChannels : {7}) { - size_t groups; - if (!useGroups) { - groups = 1; - } else { - if (outputChannels % inputChannels != 0) continue; - groups = inputChannels; - } - - size_t stride = 1; - size_t padding = 0; - size_t outputHeight = - (inputHeight - filterHeight + 2 * padding + stride) / - stride; - size_t outputWidth = - (inputWidth - filterWidth + 2 * padding + stride) / - stride; - VLOG(3) << " batchSize=" << batchSize - << " inputChannels=" << inputChannels - << " inputHeight=" << inputHeight - << " inputWidth=" << inputWidth - << " outputChannels=" << outputChannels - << " filterHeight=" << filterHeight - << " filterWidth=" << filterWidth - << " outputHeight=" << outputHeight - << " outputWidth=" << outputWidth - << " stride=" << stride << " padding=" << padding; - - std::vector paddings = {padding, padding}; - std::vector strides = {stride, stride}; - Compare2Function test( - conv1, - conv2, - FuncConfig() - .set("paddings", paddings) - .set("strides", strides) - .set("groups", groups) - .set("algo", algo)); - - TensorShape input{ - batchSize, inputChannels, inputHeight, inputWidth}; - - TensorShape filter; - if (groups > 1) - filter = TensorShape({groups, - outputChannels / groups, - inputChannels / groups, - filterHeight, - filterWidth}); - else - filter = TensorShape({outputChannels, - inputChannels, - filterHeight, - filterWidth}); - TensorShape output{ - batchSize, outputChannels, outputHeight, outputWidth}; - - if (type == kForwardTest) { - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, output)); - test.run(); - } else if (type == kBackwardInputTest) { - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, input), ADD_TO); - test.run(); - } else if (type == kBackwardFilterTest) { - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter), - ADD_TO); - test.run(); - } - } - } - } - } - } - } - } - } -}; - -// ======Start Convolution TEST====== - -TEST(Forward, GEMM) { - ConvolutionTest test( - "NaiveConv-CPU", "GemmConv-CPU", kForwardTest, false); - ConvolutionTest2 test2( - "NaiveConv-CPU", "GemmConv-CPU", kForwardTest, false); -} - -#ifndef PADDLE_ONLY_CPU -TEST(Forward, GEMM2) { - ConvolutionTest test( - "GemmConv-CPU", "GemmConv-GPU", kForwardTest, false); - ConvolutionTest2 test2( - "GemmConv-CPU", "GemmConv-GPU", kForwardTest, false); -} - -TEST(BackwardInput, GEMM) { - ConvolutionTest test( - "GemmConvGradInput-CPU", - "GemmConvGradInput-GPU", - kBackwardInputTest, - false); - ConvolutionTest2 test2( - "GemmConvGradInput-CPU", - "GemmConvGradInput-GPU", - kBackwardInputTest, - false); -} - -TEST(BackwardFilter, GEMM) { - ConvolutionTest test( - "GemmConvGradFilter-CPU", - "GemmConvGradFilter-GPU", - kBackwardFilterTest, - false); - ConvolutionTest2 test2( - "GemmConvGradFilter-CPU", - "GemmConvGradFilter-GPU", - kBackwardFilterTest, - false); -} -#endif -// ======End Convolution TEST====== - -// ======Start DepthwiseConvolution TEST====== - -// TODO(zhaolong) The depthwise convolution cpu test will be added when the cpu -// version of depthwiseConv is implemented. - -#ifndef PADDLE_ONLY_CPU - -TEST(DepthwiseConvForward, GEMM2) { - ConvolutionTest test( - "GemmConv-CPU", "DepthwiseConv-GPU", kForwardTest); - ConvolutionTest2 test2( - "GemmConv-CPU", "DepthwiseConv-GPU", kForwardTest); -} - -TEST(DepthwiseConvBackwardInput, GEMM) { - ConvolutionTest test( - "GemmConvGradInput-CPU", - "DepthwiseConvGradInput-GPU", - kBackwardInputTest); - ConvolutionTest2 test2( - "GemmConvGradInput-CPU", - "DepthwiseConvGradInput-GPU", - kBackwardInputTest); -} - -TEST(DepthwiseConvBackwardFilter, GEMM) { - ConvolutionTest test( - "GemmConvGradFilter-CPU", - "DepthwiseConvGradFilter-GPU", - kBackwardFilterTest); - ConvolutionTest2 test2( - "GemmConvGradFilter-CPU", - "DepthwiseConvGradFilter-GPU", - kBackwardFilterTest); -} - -#endif -// ======End DepthwiseConvolution TEST====== - -} // namespace paddle diff --git a/paddle/function/ConvOpTest.h b/paddle/function/ConvOpTest.h new file mode 100644 index 0000000000000000000000000000000000000000..cb02a96d0dbef6f64fd9e7576179572e68bf5513 --- /dev/null +++ b/paddle/function/ConvOpTest.h @@ -0,0 +1,256 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "FunctionTest.h" + +namespace paddle { + +template +void forward(Compare2Function& test, + const TensorShape& input, + const TensorShape& filter, + const TensorShape& output) { + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input)); + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter)); + test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, output)); + test.run(); +} + +template +void backward_input(Compare2Function& test, + const TensorShape& input, + const TensorShape& filter, + const TensorShape& output) { + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output)); + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter)); + test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, input), ADD_TO); + test.run(); +} + +template +void backward_filter(Compare2Function& test, + const TensorShape& input, + const TensorShape& filter, + const TensorShape& output) { + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output)); + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input)); + test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter), ADD_TO); + test.run(); +} + +template +using Function = void (*)(Compare2Function& test, + const TensorShape& input, + const TensorShape& filter, + const TensorShape& output); + +/** + * \brief A basic convolution function test interface. + * + * \param conv1 type name of convolution function 1. + * \param conv2 type name of convolution function 2. + * \param function test function, can be one of the forward, backward_input + * backward_filter function. + * Example: + * 1. Compare GemmConv's CPU and GPU implementation: + * Convolution( + * "GemmConv-CPU", "GemmConv-GPU", forward); + */ +template +void Convolution(const std::string& conv1, + const std::string& conv2, + Function function) { + for (size_t batchSize : {1, 5}) { + for (size_t inputSize : {7, 14, 31}) { + for (size_t filterSize : {1, 3, 5}) { + for (size_t inputChannels : {3, 16}) { + for (size_t outputChannels : {3, 16}) { + if (outputChannels < inputChannels) continue; + for (size_t stride : {1, 2}) { + for (size_t padding : {0, 1}) { + if (padding >= filterSize) break; + + // NNPACK only supports stride = 1 if batchSize > 1 + if ((conv1 == "NNPACKConv-CPU" || conv2 == "NNPACKConv-CPU") && + batchSize > 1 && stride > 1) + break; + + size_t outputSize = + (inputSize - filterSize + 2 * padding + stride) / stride; + VLOG(3) << " batchSize=" << batchSize + << " inputChannels=" << inputChannels + << " inputHeight=" << inputSize + << " inputWidth=" << inputSize + << " outputChannels=" << outputChannels + << " filterHeight=" << filterSize + << " filterWidth=" << filterSize + << " outputHeight=" << outputSize + << " outputWidth=" << outputSize << " stride=" << stride + << " padding=" << padding; + + std::vector paddings = {padding, padding}; + std::vector strides = {stride, stride}; + Compare2Function test( + conv1, + conv2, + FuncConfig() + .set("paddings", paddings) + .set("strides", strides) + .set("groups", (size_t)1) + .set("algo", (std::string) "auto")); + + TensorShape input{ + batchSize, inputChannels, inputSize, inputSize}; + TensorShape filter{ + outputChannels, inputChannels, filterSize, filterSize}; + TensorShape output{ + batchSize, outputChannels, outputSize, outputSize}; + + function(test, input, filter, output); + } + } + } + } + } + } + } +} + +/** + * \brief A convolution function test interface for + * image height is not equal image width. + */ +template +void Convolution2(const std::string& conv1, + const std::string& conv2, + Function function) { + for (size_t batchSize : {4}) { + for (size_t inputHeight : {7, 31}) { + for (size_t inputWidth : {10, 54}) { + for (size_t filterHeight : {1, 5}) { + for (size_t filterWidth : {3, 7}) { + for (size_t inputChannels : {7}) { + for (size_t outputChannels : {7}) { + size_t stride = 1; + size_t padding = 0; + size_t outputHeight = + (inputHeight - filterHeight + 2 * padding + stride) / + stride; + size_t outputWidth = + (inputWidth - filterWidth + 2 * padding + stride) / stride; + VLOG(3) << " batchSize=" << batchSize + << " inputChannels=" << inputChannels + << " inputHeight=" << inputHeight + << " inputWidth=" << inputWidth + << " outputChannels=" << outputChannels + << " filterHeight=" << filterHeight + << " filterWidth=" << filterWidth + << " outputHeight=" << outputHeight + << " outputWidth=" << outputWidth + << " stride=" << stride << " padding=" << padding; + + std::vector paddings = {padding, padding}; + std::vector strides = {stride, stride}; + Compare2Function test( + conv1, + conv2, + FuncConfig() + .set("paddings", paddings) + .set("strides", strides) + .set("groups", (size_t)1) + .set("algo", (std::string) "auto")); + + TensorShape input{ + batchSize, inputChannels, inputHeight, inputWidth}; + TensorShape filter{ + outputChannels, inputChannels, filterHeight, filterWidth}; + TensorShape output{ + batchSize, outputChannels, outputHeight, outputWidth}; + + function(test, input, filter, output); + } + } + } + } + } + } + } +} + +/** + * \brief A convolution function test interface for depthwise convolution. + */ +template +void DepthwiseConvolution(const std::string& conv1, + const std::string& conv2, + Function function) { + for (size_t batchSize : {1, 32}) { + for (size_t inputSize : {7, 14, 54}) { + for (size_t filterSize : {3, 4}) { + for (size_t inputChannels : {32}) { + for (size_t outputChannels : {32, 64}) { + for (size_t stride : {1, 2}) { + for (size_t padding : {0, 1}) { + // NNPACK only supports stride = 1 if batchSize > 1, + // and there has some bug when batchSize > 1 and groups != 1 + if ((conv1 == "NNPACKConv-CPU" || conv2 == "NNPACKConv-CPU") && + batchSize > 1) + break; + + size_t outputSize = + (inputSize - filterSize + 2 * padding + stride) / stride; + VLOG(3) << " batchSize=" << batchSize + << " inputChannels=" << inputChannels + << " inputHeight=" << inputSize + << " inputWidth=" << inputSize + << " outputChannels=" << outputChannels + << " filterHeight=" << filterSize + << " filterWidth=" << filterSize + << " outputHeight=" << outputSize + << " outputWidth=" << outputSize << " stride=" << stride + << " padding=" << padding; + + std::vector paddings = {padding, padding}; + std::vector strides = {stride, stride}; + size_t groups = inputChannels; + Compare2Function test( + conv1, + conv2, + FuncConfig() + .set("paddings", paddings) + .set("strides", strides) + .set("groups", groups) + .set("algo", (std::string) "auto")); + + TensorShape input{ + batchSize, inputChannels, inputSize, inputSize}; + TensorShape filter{groups, + outputChannels / groups, + inputChannels / groups, + filterSize, + filterSize}; + TensorShape output{ + batchSize, outputChannels, outputSize, outputSize}; + + function(test, input, filter, output); + } + } + } + } + } + } + } +} + +} // namespace paddle diff --git a/paddle/operators/mean_op_test.cc b/paddle/function/DepthwiseConvOpTest.cpp similarity index 51% rename from paddle/operators/mean_op_test.cc rename to paddle/function/DepthwiseConvOpTest.cpp index 375dcd50e130355c60f82b9d39d1b94fb2c911b0..f44ae0c342e9536366e2b537694cee81fcb1a6ed 100644 --- a/paddle/operators/mean_op_test.cc +++ b/paddle/function/DepthwiseConvOpTest.cpp @@ -13,13 +13,25 @@ See the License for the specific language governing permissions and limitations under the License. */ #include +#include "ConvOpTest.h" -#include +namespace paddle { -USE_OP(mean); +#ifndef PADDLE_ONLY_CPU +TEST(DepthwiseConv, Forward) { + DepthwiseConvolution( + "GemmConv-CPU", "DepthwiseConv-GPU", forward); +} + +TEST(DepthwiseConv, BackwardInput) { + DepthwiseConvolution( + "GemmConvGradInput-CPU", "DepthwiseConvGradInput-GPU", backward_input); +} -TEST(MeanOp, GetOpProto) { - auto& protos = paddle::framework::OpRegistry::protos(); - auto it = protos.find("mean"); - ASSERT_NE(it, protos.end()); +TEST(DepthwiseConv, BackwardFilter) { + DepthwiseConvolution( + "GemmConvGradFilter-CPU", "DepthwiseConvGradFilter-GPU", backward_filter); } +#endif + +} // namespace paddle diff --git a/paddle/function/GemmConvOpTest.cpp b/paddle/function/GemmConvOpTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5283d79a5a53d979ae4e134f7e46b7ee106e9c44 --- /dev/null +++ b/paddle/function/GemmConvOpTest.cpp @@ -0,0 +1,50 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "ConvOpTest.h" + +namespace paddle { + +TEST(GemmConv, NaiveConv) { + Convolution( + "NaiveConv-CPU", "GemmConv-CPU", forward); + Convolution2( + "NaiveConv-CPU", "GemmConv-CPU", forward); +} + +#ifndef PADDLE_ONLY_CPU +TEST(GemmConv, Forward) { + Convolution( + "GemmConv-CPU", "GemmConv-GPU", forward); + Convolution2( + "GemmConv-CPU", "GemmConv-GPU", forward); +} + +TEST(GemmConv, BackwardInput) { + Convolution( + "GemmConvGradInput-CPU", "GemmConvGradInput-GPU", backward_input); + Convolution2( + "GemmConvGradInput-CPU", "GemmConvGradInput-GPU", backward_input); +} + +TEST(GemmConv, BackwardFilter) { + Convolution( + "GemmConvGradFilter-CPU", "GemmConvGradFilter-GPU", backward_filter); + Convolution2( + "GemmConvGradFilter-CPU", "GemmConvGradFilter-GPU", backward_filter); +} +#endif + +} // namespace paddle diff --git a/paddle/function/nnpack/NNPACKConvOp.cpp b/paddle/function/nnpack/NNPACKConvOp.cpp index 00d048eb216baf37c875c870a31cfd55a97f2974..6ccc487cf1c26b181b025cc62c93807c8a2848ef 100644 --- a/paddle/function/nnpack/NNPACKConvOp.cpp +++ b/paddle/function/nnpack/NNPACKConvOp.cpp @@ -196,30 +196,30 @@ public: CHECK_EQ(status, nnp_status_success); } } else { - for (size_t g = 0; g < groups_; g++) { - // only supports stride = 1 - CHECK_EQ(strideH(), 1); - CHECK_EQ(strideW(), 1); - nnp_status status = - nnp_convolution_output(algorithm_, - batchSize, - inputChannels / groups_, - outputChannels / groups_, - inputSize, - padding, - kernelSize, - inputData + inputOffset * g, - filterData + filterOffset * g, - nullptr, /* bias */ - outputData + outputOffset * g, - bufferPtr, - sizePtr, - nnp_activation_identity, - nullptr, - threadpool_, /* threadpool */ - nullptr); - CHECK_EQ(status, nnp_status_success); - } + // only supports stride = 1 + CHECK_EQ(strideH(), 1); + CHECK_EQ(strideW(), 1); + + // TODO(hedaoyuan): There has some bug when batchSize > 1 and groups_ > 1. + CHECK_EQ(groups_, static_cast(1)); + nnp_status status = nnp_convolution_output(algorithm_, + batchSize, + inputChannels, + outputChannels, + inputSize, + padding, + kernelSize, + inputData, + filterData, + nullptr, /* bias */ + outputData, + bufferPtr, + sizePtr, + nnp_activation_identity, + nullptr, + threadpool_, /* threadpool */ + nullptr); + CHECK_EQ(status, nnp_status_success); } } diff --git a/paddle/function/nnpack/NNPACKConvOpTest.cpp b/paddle/function/nnpack/NNPACKConvOpTest.cpp index 48180112111c67f36ddd425008187201655089c9..4dd3982487f3567f461ddaea8c5dc719fff04736 100644 --- a/paddle/function/nnpack/NNPACKConvOpTest.cpp +++ b/paddle/function/nnpack/NNPACKConvOpTest.cpp @@ -13,87 +13,18 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include "paddle/function/Function.h" -#include "paddle/function/FunctionTest.h" - -DEFINE_string(algo, - "auto", - "The algorithm (auto, ft8x8, ft16x16, wt8x8, " - "implicit-gemm, or direct) for computing convolution of NNPACK."); +#include "paddle/function/ConvOpTest.h" namespace paddle { -#define IS_NNPACK_SUPPORT(algo, filterSize, stride) \ - if (algo == "direct" && filterSize != 1) continue; \ - if (algo == "direct" && batchSize != 1) continue; \ - if (algo == "wt8x8" && filterSize != 3) continue; \ - if (algo == "implicit-gemm" && batchSize != 1) continue; \ - if (algo != "auto" && algo != "implicit-gemm" && stride > 1) continue; - -class ConvolutionTest { -public: - ConvolutionTest(const std::string& conv1, - const std::string& conv2, - std::string algo = "auto") { - for (size_t batchSize : {1, 32}) { - for (size_t inputSize : {7, 14, 54}) { - for (size_t filterSize : {1, 3, 5}) { - for (size_t inputChannels : {3, 64}) { - for (size_t outputChannels : {3, 64, 128}) { - if (inputChannels < outputChannels) break; - for (size_t stride : {1, 2}) { - // if batchSize > 1 NNPACKConv only supports stride = 1 - if (batchSize > 1 && stride > 1) break; - for (size_t padding : {0, 1}) { - if (padding >= filterSize) break; - size_t outputSize = - (inputSize - filterSize + 2 * padding + stride) / stride; - IS_NNPACK_SUPPORT(algo, filterSize, stride); - LOG(INFO) << " batchSize=" << batchSize - << " inputChannels=" << inputChannels - << " inputHeight=" << inputSize - << " inputWidth=" << inputSize - << " outputChannels=" << outputChannels - << " filterHeight=" << filterSize - << " filterWidth=" << filterSize - << " outputHeight=" << outputSize - << " outputWidth=" << outputSize - << " stride=" << stride << " padding=" << padding; - - std::vector paddings = {padding, padding}; - std::vector strides = {stride, stride}; - Compare2Function test( - conv1, - conv2, - FuncConfig() - .set("paddings", paddings) - .set("strides", strides) - .set("groups", (size_t)1) - .set("algo", algo)); - - TensorShape shape0{ - batchSize, inputChannels, inputSize, inputSize}; - TensorShape shape1{ - outputChannels, inputChannels, filterSize, filterSize}; - TensorShape shape2{ - batchSize, outputChannels, outputSize, outputSize}; - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape0)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape1)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape2)); - test.run(); - } - } - } - } - } - } - } - } -}; +TEST(NNPACK, Forward) { + Convolution( + "GemmConv-CPU", "NNPACKConv-CPU", forward); +} -TEST(Convolution, NNPACK) { - // NNPACK only supports stride = 1 - ConvolutionTest test("GemmConv-CPU", "NNPACKConv-CPU", FLAGS_algo); +TEST(NNPACK, Depthwise) { + DepthwiseConvolution( + "GemmConv-CPU", "NNPACKConv-CPU", forward); } } // namespace paddle diff --git a/paddle/gserver/CMakeLists.txt b/paddle/gserver/CMakeLists.txt index 0012636b8f618a1b45cfc801c04781e67694956f..62cff9361ccba3ae3b9359ddb932f5b26146eb97 100644 --- a/paddle/gserver/CMakeLists.txt +++ b/paddle/gserver/CMakeLists.txt @@ -23,6 +23,17 @@ endmacro() filter_test(GSERVER_HEADER) filter_test(GSERVER_SOURCES) + +if(NOT WITH_MKLDNN) + file(GLOB_RECURSE DNN_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.h") + file(GLOB_RECURSE DNN_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.cpp") + list(REMOVE_ITEM GSERVER_HEADER ${DNN_HEADER}) + list(REMOVE_ITEM GSERVER_SOURCES ${DNN_SOURCES}) + message(STATUS "Skip compiling with MKLDNNLayers and MKLDNNActivations") +else() + message(STATUS "Compile with MKLDNNLayers and MKLDNNActivations") +endif() + if(NOT WITH_GPU) list(REMOVE_ITEM GSERVER_HEADER layers/CudnnConvBaseLayer.h diff --git a/paddle/gserver/activations/ActivationFunction.cpp b/paddle/gserver/activations/ActivationFunction.cpp index 5de2170877ed6f6c70c5617918ad2c4e3b3ed2ee..78e958e06fac84fa956abc9faea60157bf6132eb 100644 --- a/paddle/gserver/activations/ActivationFunction.cpp +++ b/paddle/gserver/activations/ActivationFunction.cpp @@ -112,7 +112,6 @@ BEGIN_DEFINE_ACTIVATION(softmax) private: MatrixPtr sftMaxSum_; MatrixPtr sftMaxDot_; -MatrixPtr one_; public: Error __must_check forward(Argument& act) { @@ -138,14 +137,6 @@ Error __must_check backward(Argument& act) { 1, /* trans */ false, useGpu(act.deviceId)); - if (!one_ || one_->getWidth() != outputG->getWidth()) { - Matrix::resizeOrCreate(one_, - 1, - outputG->getWidth(), - /* trans */ false, - useGpu(act.deviceId)); - one_->one(); - } sftMaxDot_->dotMul(*outputG, *outputV); sftMaxSum_->colMerge(*sftMaxDot_); diff --git a/paddle/gserver/layers/MKLDNNBase.h b/paddle/gserver/layers/MKLDNNBase.h new file mode 100644 index 0000000000000000000000000000000000000000..4c0234e7b3a91053596c32cea581fa5d1e26b9d5 --- /dev/null +++ b/paddle/gserver/layers/MKLDNNBase.h @@ -0,0 +1,97 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "mkldnn.hpp" + +namespace paddle { + +typedef enum { + MKLDNN_BASE = 1, // basical info of MKLDNN + MKLDNN_TESTS = 1, // gtest info of MKLDNN + MKLDNN_SIZES = 2, // size info of MKLDNN + MKLDNN_FMTS = 3, // format info of MKLDNN + MKLDNN_ALL = 4, // show all info of MKLDNN +} MKLDNN_LOG_LEVEL; + +/** + * @brief MKLDNN CPU engine. + * + */ +class CPUEngine { +public: + static CPUEngine& Instance() { + // Thread-safe in C++11. + static CPUEngine myInstance; + return myInstance; + } + + // Disallow copy or move + CPUEngine(const CPUEngine&) = delete; // Copy constructor + CPUEngine(CPUEngine&&) = delete; // Move constructor + CPUEngine& operator=(const CPUEngine&) = delete; // Copy assignment + CPUEngine& operator=(CPUEngine&&) = delete; // Move assignment + + mkldnn::engine& getEngine() { return cpuEngine_; } + +protected: + CPUEngine() : cpuEngine_(mkldnn::engine::cpu, 0) {} + // CPUEngine() : cpuEngine_(mkldnn::engine::cpu_lazy, 0) {} + ~CPUEngine() {} + +private: + mkldnn::engine cpuEngine_; +}; + +/** + * @brief MKLDNN Stream. + * + */ +class MKLDNNStream { +public: + MKLDNNStream() : ready_(false) { resetState(); } + + virtual ~MKLDNNStream() {} + + /** + * @brief Submit stream + * @param prims The primitives vector + * @param block Waiting for the stream to complete + */ + void submit(std::vector& prims, bool block = true) { + resetState(); + stream_->submit(prims).wait(block); + ready_ = false; + } + + /** + * @brief Reset the mkldnn stream + */ + void resetState() { + if (ready_) { + return; + } + // TODO(TJ): change me when mkldnn have method to reset this state + // stream_.reset(new mkldnn::stream(mkldnn::stream::kind::lazy)); + stream_.reset(new mkldnn::stream(mkldnn::stream::kind::eager)); + ready_ = true; + } + +private: + bool ready_; + std::shared_ptr stream_; +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..30f567eaf8248a8fba1b461a2bdbf2aab13f9e08 --- /dev/null +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -0,0 +1,282 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "MKLDNNFcLayer.h" +#include "paddle/utils/Logging.h" +#include "paddle/utils/Stat.h" + +using namespace mkldnn; // NOLINT +typedef memory::format format; +typedef inner_product_forward fc_fwd; +typedef inner_product_backward_weights fc_bwdWgt; +typedef inner_product_backward_data fc_bwdData; + +namespace paddle { + +REGISTER_LAYER(mkldnn_fc, MKLDNNFcLayer); + +bool MKLDNNFcLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + if (!MKLDNNLayer::init(layerMap, parameterMap)) { + return false; + } + + CHECK_EQ(inputLayers_.size(), 1) << "Only support one input layer yet"; + CHECK_EQ(inputLayers_.size(), parameters_.size()); + CHECK(!parameters_[0]->isSparse()) << "Do not support sparse yet"; + + // output size, cat not be changed + oc_ = getSize(); + oh_ = 1; + ow_ = 1; + + // input size can not change in FC + iLayerSize_ = inputLayers_[0]->getSize(); + CHECK_EQ(parameters_[0]->getSize(), iLayerSize_ * oc_); + + // create weight + weight_ = + std::unique_ptr(new Weight(oc_, iLayerSize_, parameters_[0], 0)); + + // create biases + if (biasParameter_.get() != NULL) { + biases_ = std::unique_ptr(new Weight(1, oc_, biasParameter_)); + } + return true; +} + +void MKLDNNFcLayer::convertWeightsFromPaddle() { + if (FLAGS_use_mkldnn_wgt) { + return; + } + + if (hasInitedWgt_) { + return; + } + + // The weight_ is transposed from initial paddle weight + MatrixPtr paddleWgt = Matrix::create( + weight_->getW()->getData(), iLayerSize_, oc_, false, false); + + // TODO(TJ): remove this print when do not need differ weights + std::ostringstream ostr; + paddleWgt->print(ostr); + VLOG(MKLDNN_ALL) << "Initial Weight from paddle: " << std::endl << ostr.str(); + + // The mkldnn weight is transposed from initial paddle matrix + MatrixPtr paddleWgtT; + paddleWgt->transpose(paddleWgtT, true); + weight_->getW()->copyFrom(*paddleWgtT); + hasInitedWgt_ = true; +} + +void MKLDNNFcLayer::convertWeightsToPaddle() { + MatrixPtr dnnWgt = weight_->getW(); + MatrixPtr paddleWgt; + dnnWgt->transpose(paddleWgt, true); + + // copy paddle weight and override on weight_ + MatrixPtr dnnWgtT = Matrix::create( + dnnWgt->getData(), dnnWgt->getWidth(), dnnWgt->getHeight(), false, false); + dnnWgtT->copyFrom(*paddleWgt); +} + +void MKLDNNFcLayer::reshape() { + const Argument& input = getInput(0); + int batchSize = input.getBatchSize(); + if (bs_ == batchSize) { + return; + } + bs_ = batchSize; + ih_ = input.getFrameHeight(); + iw_ = input.getFrameWidth(); + if (ih_ == 0) { + ih_ = 1; + } + if (iw_ == 0) { + iw_ = 1; + } + hasSpatial_ = true; + if (ih_ == 1 && iw_ == 1) { + hasSpatial_ = false; + } + CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize()); + ic_ = iLayerSize_ / (ih_ * iw_); + CHECK_EQ(size_t(ic_ * ih_ * iw_), iLayerSize_) << "not divisible"; + CHECK_EQ(size_t(oc_), getSize()); + printSizeInfo(); + + // reset output + output_.setFrameHeight(oh_); + output_.setFrameWidth(ow_); + resetOutput(bs_, oc_); + + // reset mkldnn forward + resetFwd(); + needResetBwd_ = true; + + convertWeightsFromPaddle(); +} + +void MKLDNNFcLayer::resetFwd() { + bool hasBias = biases_ && biases_->getW(); + real* iData = getInputValue(0)->getData(); + real* oData = getOutputValue()->getData(); + real* wData = weight_->getW()->getData(); + real* bData = hasBias ? biases_->getW()->getData() : NULL; + + // TODO(TJ): below create should be covered in MkldnnMatrix + // create memory desc + memory::desc iMD = hasSpatial_ ? createMD({bs_, ic_, ih_, iw_}, format::nchw) + : createMD({bs_, ic_}, format::nc); + memory::desc wMD = hasSpatial_ ? createMD({oc_, ic_, ih_, iw_}, format::oihw) + : createMD({oc_, ic_}, format::oi); + memory::desc bMD = bData != NULL ? createMD({oc_}, format::x) + : createMD({}, format::format_undef); + memory::desc oMD = createMD({bs_, oc_}, format::nc); + + // create memory primitive desc and memory self + inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData)); + wgtVal_.reset(new memory(memory::primitive_desc(wMD, engine_), wData)); + outVal_.reset(new memory(memory::primitive_desc(oMD, engine_), oData)); + + prop_kind pk = prop_kind::forward; + fc_fwd::desc fwdDesc = bData != NULL ? fc_fwd::desc(pk, iMD, wMD, bMD, oMD) + : fc_fwd::desc(pk, iMD, wMD, oMD); + fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); + + if (bData != NULL) { + biasVal_.reset(new memory(memory::primitive_desc(bMD, engine_), bData)); + fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_)); + } else { + fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *outVal_)); + } + pipelineFwd_.clear(); + pipelineFwd_.push_back(*fwd_); +} + +void MKLDNNFcLayer::resetBwd() { + if (!needResetBwd_) { + return; + } + needResetBwd_ = false; + + bool hasBias = biases_ && biases_->getWGrad(); + real* iData = getInputValue(0)->getData(); + real* iDiff = getInputGrad(0) != nullptr ? getInputGrad(0)->getData() : NULL; + real* oDiff = getOutputGrad()->getData(); + real* wDiff = weight_->getWGrad()->getData(); + real* bDiff = hasBias ? biases_->getWGrad()->getData() : NULL; + + /// backward weight + // create memory desc for backward memory + memory::desc iMD = hasSpatial_ ? createMD({bs_, ic_, ih_, iw_}, format::nchw) + : createMD({bs_, ic_}, format::nc); + memory::desc wMD = hasSpatial_ ? createMD({oc_, ic_, ih_, iw_}, format::oihw) + : createMD({oc_, ic_}, format::oi); + memory::desc oMD = createMD({bs_, oc_}, format::nc); + memory::desc bMD = bDiff != NULL ? createMD({oc_}, format::x) + : createMD({}, format::format_undef); + + if (inVal_) { + // update data + inVal_->set_data_handle(iData); + } else { + inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData)); + } + + // create memory primitive desc and memory self + wgtGrad_.reset(new memory(memory::primitive_desc(wMD, engine_), wDiff)); + outGrad_.reset(new memory(memory::primitive_desc(oMD, engine_), oDiff)); + + fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward, iMD, wMD, oMD); + fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); + fc_bwdWgt::desc bwdWgtDesc = bDiff != NULL + ? fc_bwdWgt::desc(iMD, wMD, bMD, oMD) + : fc_bwdWgt::desc(iMD, wMD, oMD); + fc_bwdWgt::primitive_desc bwdWgtPD = + fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD); + + if (bDiff != NULL) { + biasGrad_.reset(new memory(memory::primitive_desc(bMD, engine_), bDiff)); + bwdWgt_.reset( + new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_, *biasGrad_)); + } else { + bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_)); + } + pipelineBwd_.clear(); + pipelineBwd_.push_back(*bwdWgt_); + + /// backward data + if (iDiff == NULL) { + return; + } + fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(iMD, wMD, oMD); + fc_bwdData::primitive_desc bwdDataPD = + fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD); + inGrad_.reset(new memory(memory::primitive_desc(iMD, engine_), iDiff)); + CHECK(wgtVal_) << "Should have weight memory"; + bwdData_.reset(new fc_bwdData(bwdDataPD, *outGrad_, *wgtVal_, *inGrad_)); + pipelineBwd_.push_back(*bwdData_); +} + +void MKLDNNFcLayer::forward(PassType passType) { + Layer::forward(passType); + reshape(); + + { + REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str()); + + // update input data + // since it might be changed if this is after data layer + real* iData = getInputValue(0)->getData(); + inVal_->set_data_handle(iData); + + // just submit forward pipeline + stream_->submit(pipelineFwd_); + } + + /* activation */ { + REGISTER_TIMER_INFO("FwActTimer", getName().c_str()); + forwardActivation(); + } +} + +void MKLDNNFcLayer::backward(const UpdateCallback& callback) { + /* Do derivation */ { + REGISTER_TIMER_INFO("BpActTimer", getName().c_str()); + backwardActivation(); + } + + { + REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str()); + resetBwd(); + + // update diff + real* oDiff = getOutputGrad()->getData(); + outGrad_->set_data_handle(oDiff); + + // just sumbmit backward pipeline + stream_->submit(pipelineBwd_); + } + + { + REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); + weight_->getParameterPtr()->incUpdate(callback); + if (biases_ && biases_->getWGrad()) { + biases_->getParameterPtr()->incUpdate(callback); + } + } +} +} // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNFcLayer.h b/paddle/gserver/layers/MKLDNNFcLayer.h new file mode 100644 index 0000000000000000000000000000000000000000..7954852a23f81d36d5fb0ae6a19768f419886fb1 --- /dev/null +++ b/paddle/gserver/layers/MKLDNNFcLayer.h @@ -0,0 +1,80 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "MKLDNNLayer.h" +#include "mkldnn.hpp" + +namespace paddle { + +/** + * @brief A subclass of MKLDNNLayer fc layer. + * + * The config file api is mkldnn_fc + */ +class MKLDNNFcLayer : public MKLDNNLayer { +protected: + // input layer size, can not be change after init + size_t iLayerSize_; // == ic * ih * iw + + // if has already init the weight + bool hasInitedWgt_; + + // if input layer has image size info (ih>1 && iw>1) + bool hasSpatial_; + + // fc weight and bias + std::unique_ptr weight_; + std::unique_ptr biases_; + +public: + explicit MKLDNNFcLayer(const LayerConfig& config) + : MKLDNNLayer(config), hasInitedWgt_(false), hasSpatial_(true) {} + + ~MKLDNNFcLayer() {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void convertWeightsFromPaddle() override; + + void convertWeightsToPaddle() override; + + void forward(PassType passType) override; + + void backward(const UpdateCallback& callback) override; + +protected: + /** + * reshape the input image sizes + * and reset output buffer size + * and reset mkldnn forward + */ + void reshape(); + + /** + * reset the forward primitve and memory + * only would be called when input size changes + */ + void resetFwd(); + + /** + * reset the backward primitve and memory for mkldnn fc + * only would be called when needed + */ + void resetBwd(); +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h new file mode 100644 index 0000000000000000000000000000000000000000..63e29f447eede5ff9df8715bc9140b64ab7f7d17 --- /dev/null +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -0,0 +1,132 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "Layer.h" +#include "MKLDNNBase.h" +#include "mkldnn.hpp" + +DECLARE_bool(use_mkldnn); +DECLARE_bool(use_mkldnn_wgt); + +namespace paddle { + +class MKLDNNLayer; +typedef std::shared_ptr MKLDNNLayerPtr; + +/** + * @brief Base class of MKLDNNlayer. + * + */ +class MKLDNNLayer : public Layer { +protected: + // batch size + int bs_; + // input image channel, height and width + int ic_, ih_, iw_; + // output image channel, height and width + int oc_, oh_, ow_; + + // backward also need reset after reset forward handle + bool needResetBwd_; + + // mkldnn engine, stream and primivtives + mkldnn::engine engine_; + std::shared_ptr stream_; + std::shared_ptr fwd_; + std::shared_ptr bwdWgt_; + std::shared_ptr bwdData_; + std::vector pipelineFwd_; + std::vector pipelineBwd_; + + // TODO(TJ): change below memory as MKLDNNMatrixPtr type + std::shared_ptr inVal_; + std::shared_ptr inGrad_; + std::shared_ptr outVal_; + std::shared_ptr outGrad_; + std::shared_ptr wgtVal_; + std::shared_ptr wgtGrad_; + std::shared_ptr biasVal_; + std::shared_ptr biasGrad_; + +public: + explicit MKLDNNLayer(const LayerConfig& config) + : Layer(config), + bs_(0), + ic_(0), + ih_(0), + iw_(0), + oc_(0), + oh_(0), + ow_(0), + needResetBwd_(true), + engine_(mkldnn::engine::cpu, 0), + stream_(nullptr), + fwd_(nullptr), + bwdWgt_(nullptr), + bwdData_(nullptr) {} + + ~MKLDNNLayer() {} + + virtual bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + if (!Layer::init(layerMap, parameterMap)) { + return false; + } + + CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." + << "Please set WITH_MKLDNN=ON " + << "and set use_mkldnn=True"; + stream_.reset(new MKLDNNStream()); + engine_ = CPUEngine::Instance().getEngine(); + + // TODO(TJ): deivecId + return true; + } + + /** + * convert weight from paddle format to mkldnn format + * weight_ will be override + */ + virtual void convertWeightsFromPaddle() {} + + /** + * convert mkldnn weight to paddle format + * weight_ will be override + */ + virtual void convertWeightsToPaddle() {} + + /** + * print info about sizes + */ + virtual void printSizeInfo() { + VLOG(MKLDNN_SIZES) << getName() << ": bs: " << bs_ << ", ic: " << ic_ + << ", ih: " << ih_ << ", iw: " << iw_ << ", oc: " << oc_ + << ", oh: " << oh_ << ", ow: " << ow_; + } + + // TODO(TJ): move to MkldnnMatrix + // create memory desc + inline mkldnn::memory::desc createMD( + mkldnn::memory::dims dims, + mkldnn::memory::format fmt, + mkldnn::memory::data_type type = mkldnn::memory::data_type::f32) { + // TODO(TJ): isFmtSuppoted(fmt) + return mkldnn::memory::desc(dims, type, fmt); + } +}; + +} // namespace paddle diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index 209d0ab9c8d7e8463c8636b1412622a94f359fb1..c2a2993620492a9ec5dae932ff1292ced2c00064 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -9,7 +9,7 @@ add_unittest_without_exec(test_ProtoDataProvider # mkdir will get error. add_test(NAME test_ProtoDataProvider COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_ProtoDataProvider - WORKING_DIRECTORY ${PROJ_ROOT}/paddle) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) ################# test_LayerGrad ####################### add_unittest_without_exec(test_LayerGrad @@ -18,6 +18,15 @@ add_unittest_without_exec(test_LayerGrad add_test(NAME test_LayerGrad COMMAND test_LayerGrad) +########## test_Mkldnn layers and activations ########## +if(WITH_MKLDNN) + add_unittest_without_exec(test_MKLDNN + test_MKLDNN.cpp + MKLDNNTester.cpp + LayerGradUtil.cpp) + add_test(NAME test_MKLDNN COMMAND test_MKLDNN) +endif() + ################ test_CRFLayerGrad #################### add_unittest_without_exec(test_CRFLayerGrad test_CRFLayerGrad.cpp @@ -92,8 +101,8 @@ if(WITH_PYTHON) test_PyDataProvider.cpp) add_test(NAME test_PyDataProvider - COMMAND .set_python_path.sh -d ./gserver/tests:${PROJ_ROOT}/python/ ${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProvider - WORKING_DIRECTORY ${PROJ_ROOT}/paddle) + COMMAND .set_python_path.sh -d ./gserver/tests:${PADDLE_SOURCE_DIR}/python/ ${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProvider + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) endif() ############### test_RecurrentLayer ####################### @@ -106,7 +115,7 @@ if(NOT WITH_DOUBLE) add_test(NAME test_WarpCTCLayer COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_WarpCTCLayer --warpctc_dir=${WARPCTC_LIB_DIR} - WORKING_DIRECTORY ${PROJ_ROOT}/paddle) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) endif() ############### test_RecurrentGradientMachine ############### @@ -116,20 +125,20 @@ add_unittest_without_exec(test_RecurrentGradientMachine test_RecurrentGradientMachine.cpp) add_test(NAME test_RecurrentGradientMachine COMMAND .set_python_path.sh -d - ${PROJ_ROOT}/python:${PROJ_ROOT}/paddle/gserver/tests + ${PADDLE_SOURCE_DIR}/python:${PADDLE_SOURCE_DIR}/paddle/gserver/tests ${CMAKE_CURRENT_BINARY_DIR}/test_RecurrentGradientMachine - WORKING_DIRECTORY ${PROJ_ROOT}/paddle) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) add_unittest_without_exec(test_NetworkCompare test_NetworkCompare.cpp) if(WITH_GPU) add_test(NAME test_NetworkCompare - COMMAND .set_python_path.sh -d ${PROJ_ROOT}/python ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=true - WORKING_DIRECTORY ${PROJ_ROOT}/paddle) + COMMAND .set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=true + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) else() add_test(NAME test_NetworkCompare - COMMAND .set_python_path.sh -d ${PROJ_ROOT}/python ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=false - WORKING_DIRECTORY ${PROJ_ROOT}/paddle) + COMMAND .set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=false + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) endif() @@ -137,6 +146,6 @@ add_unittest_without_exec(test_PyDataProvider2 test_PyDataProvider2.cpp) add_test(NAME test_PyDataProvider2 - COMMAND .set_python_path.sh -d ${PROJ_ROOT}/paddle/gserver/tests:${PROJ_ROOT}/python ${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProvider2 - WORKING_DIRECTORY ${PROJ_ROOT}/paddle + COMMAND .set_python_path.sh -d ${PADDLE_SOURCE_DIR}/paddle/gserver/tests:${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProvider2 + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle ) diff --git a/paddle/gserver/tests/LayerGradUtil.cpp b/paddle/gserver/tests/LayerGradUtil.cpp index fd9cfa1dc7a9028cb2c5c98baca98ffb2a837bac..a38880e14cdfcef05461dae567d198e5400c6bb1 100644 --- a/paddle/gserver/tests/LayerGradUtil.cpp +++ b/paddle/gserver/tests/LayerGradUtil.cpp @@ -388,14 +388,23 @@ void initDataLayer(TestConfig testConf, data.grad->zeroMem(); break; case INPUT_SELF_DEFINE_DATA: { - size_t height = testConf.inputDefs[i].selfDefinedData->getHeight(); - size_t width = testConf.inputDefs[i].selfDefinedData->getWidth(); - CHECK_GT(static_cast(height), 0); - CHECK_GT(static_cast(width), 0); - data.value = Matrix::create(height, width, false, useGpu); - data.grad = Matrix::create(height, width, false, useGpu); - data.value->copyFrom(*testConf.inputDefs[i].selfDefinedData); - data.grad->zeroMem(); + if (testConf.inputDefs[i].ids.size()) { + data.ids = IVector::create(testConf.inputDefs[i].ids.size(), useGpu); + data.ids->copyFrom(testConf.inputDefs[i].ids.data(), + testConf.inputDefs[i].ids.size()); + } else if (testConf.inputDefs[i].selfDefinedData) { + size_t height = testConf.inputDefs[i].selfDefinedData->getHeight(); + size_t width = testConf.inputDefs[i].selfDefinedData->getWidth(); + CHECK_GT(static_cast(height), 0); + CHECK_GT(static_cast(width), 0); + data.value = Matrix::create(height, width, false, useGpu); + data.grad = Matrix::create(height, width, false, useGpu); + data.value->copyFrom(*testConf.inputDefs[i].selfDefinedData); + data.grad->zeroMem(); + } else { + LOG(FATAL) << "No self-defined data are given."; + return; + } const std::vector& labelSeqStartPositions = testConf.inputDefs[i].labelSeqStartPositions; diff --git a/paddle/gserver/tests/LayerGradUtil.h b/paddle/gserver/tests/LayerGradUtil.h index 5debedf5ef6a3262578ca01b335e664f9a334d35..88e831f78bd165f63806df6c081d84411be51502 100644 --- a/paddle/gserver/tests/LayerGradUtil.h +++ b/paddle/gserver/tests/LayerGradUtil.h @@ -68,6 +68,7 @@ struct InputDef { std::vector labelInitValue; std::vector labelSeqStartPositions; std::vector labelSubSeqStartPositions; + std::vector ids; MatrixPtr selfDefinedData; InputDef(InputType type, string nameIn, size_t dimIn, size_t sizeIn) { @@ -95,6 +96,23 @@ struct InputDef { isStatic = false; } + InputDef(InputType type, + string nameIn, + const std::vector& ids, + const std::vector& selfDefinedSeqStartPos = {}, + const std::vector& selfDefinedSubSeqStartPos = {}) + : labelSeqStartPositions(selfDefinedSeqStartPos), + labelSubSeqStartPositions(selfDefinedSubSeqStartPos), + ids(ids) { + selfDefinedData = nullptr; + inputType = type; + name = nameIn; + dim = 0; + sparse = {""}; + paraSize = 0; + isStatic = false; + } + InputDef(InputType type, string nameIn, size_t dimIn, diff --git a/paddle/gserver/tests/MKLDNNTester.cpp b/paddle/gserver/tests/MKLDNNTester.cpp new file mode 100644 index 0000000000000000000000000000000000000000..99c8c4948c9b05ad15d1217ebb70026bbd48453f --- /dev/null +++ b/paddle/gserver/tests/MKLDNNTester.cpp @@ -0,0 +1,369 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "MKLDNNTester.h" +#include "paddle/gserver/layers/MKLDNNBase.h" +#include "paddle/gserver/layers/MKLDNNLayer.h" + +namespace paddle { + +// init data layer and test layer of both dnn and reference +void MKLDNNTester::reset(const TestConfig& dnn, + const TestConfig& ref, + size_t batchSize) { + const bool trans = false; + const bool useGpu = false; + + // clear + configs_.clear(); + layerNames_.clear(); + dataLayers_.clear(); + datas_.clear(); + layerMaps_.clear(); + parameters_.clear(); + testLayers_.clear(); + + // resize + configs_.resize(NUM); + layerNames_.resize(NUM); + dataLayers_.resize(NUM); + datas_.resize(NUM); + layerMaps_.resize(NUM); + parameters_.resize(NUM); + testLayers_.resize(NUM); + + // reset configs and layer names + configs_[DNN] = dnn; + configs_[REF] = ref; + layerNames_[DNN] = "mkldnn"; // the first is mkldnn layer + layerNames_[REF] = "reference"; // second is reference layer + + // reset others + for (size_t i = 0; i < NUM; ++i) { + configs_[i].layerConfig.set_name(layerNames_[i]); + initDataLayer(configs_[i], + &(dataLayers_[i]), + &(datas_[i]), + &(layerMaps_[i]), + layerNames_[i], + batchSize, + trans, + useGpu); + initTestLayer( + configs_[i], &(layerMaps_[i]), &(parameters_[i]), &(testLayers_[i])); + } + dnnLayer_ = testLayers_[DNN]; + refLayer_ = testLayers_[REF]; + EXPECT_EQ(dataLayers_[DNN].size(), dataLayers_[REF].size()); + EXPECT_EQ(parameters_[DNN].size(), parameters_[REF].size()); + + setInputImgSize(); +} + +void MKLDNNTester::setInputImgSize() { + for (size_t n = 0; n < dataLayers_.size(); ++n) { + for (size_t i = 0; i < dataLayers_[n].size(); ++i) { + // TODO(TJ): fix me when concat and elewise ready + dataLayers_[n][i]->getOutput().setFrameHeight(ih_); + dataLayers_[n][i]->getOutput().setFrameWidth(iw_); + } + } +} + +// init randome parameters of ref, and copy to mkldnn +void MKLDNNTester::randomWgtDatas() { + EXPECT_EQ(parameters_[DNN].size(), parameters_[REF].size()); + for (size_t i = 0; i < parameters_[REF].size(); ++i) { + const VectorPtr& dnnValue = parameters_[DNN][i]->getBuf(PARAMETER_VALUE); + const VectorPtr& refValue = parameters_[REF][i]->getBuf(PARAMETER_VALUE); + parameters_[REF][i]->randomize(); + dnnValue->copyFrom(*refValue); + + VLOG(lvl_) << "Random weight data " << parameters_[DNN][i]->getName(); + printVector(dnnValue); + } +} + +// random botdata of ref layer and copy same to mkldnn +void MKLDNNTester::randomBotDatas() { + CHECK_EQ(dataLayers_.size(), NUM); + for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) { + dataLayers_[REF][i]->getOutputValue()->randomizeUniform(); + dataLayers_[DNN][i]->getOutputValue()->copyFrom( + *(dataLayers_[REF][i]->getOutputValue())); + VLOG(lvl_) << "Input " << i << " data:"; + printMatrix(dataLayers_[REF][i]->getOutputValue()); + } +} + +void MKLDNNTester::randomTopDiffs() { + refLayer_->getOutputGrad()->randomizeUniform(); + dnnLayer_->getOutputGrad()->copyFrom(*(refLayer_->getOutputGrad())); + VLOG(lvl_) << "Random dom Backward Input, TopDiff: "; + printMatrix(refLayer_->getOutputGrad()); +} + +void MKLDNNTester::checkForward() { + printTopDatas(); + double delta = compareMatrix(testLayers_[DNN]->getOutputValue(), + testLayers_[REF]->getOutputValue()); + VLOG(MKLDNN_ALL) << "Check Forward"; + EXPECT_LE(fabs(delta), eps_); +} + +void MKLDNNTester::checkBackwardData() { + // TODO(TJ): uncomment me when batch norm ready + // const bool isBN = dnnLayer_->getType() == "mkldnn_batch_norm"; + for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) { + const MatrixPtr& dnnDiff = dataLayers_[DNN][i]->getOutputGrad(); + const MatrixPtr& refDiff = dataLayers_[REF][i]->getOutputGrad(); + VLOG(lvl_) << "Mkldnn Backward Output BotDiff " << i; + printMatrix(dnnDiff); + VLOG(lvl_) << "Reference Backward Output BotDiff " << i; + printMatrix(refDiff); + + double delta = compareMatrix(dnnDiff, refDiff); + EXPECT_LE(fabs(delta), eps_); + // TODO(TJ): uncomment me when batch norm ready + // if (isBN) { + // // the other two inputs in batch norm are for moving mean and var + // break; + // } + } +} + +void MKLDNNTester::checkBackwardWgts() { + CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size()); + vector dnnWgts; // used to temply save mkldnn weights + saveWgt(parameters_[DNN], dnnWgts); + + const MKLDNNLayerPtr dnnlayer = + std::dynamic_pointer_cast(dnnLayer_); + CHECK(dnnlayer); + dnnlayer->convertWeightsToPaddle(); + for (size_t i = 0; i < parameters_[DNN].size(); ++i) { + const VectorPtr& dnn = parameters_[DNN][i]->getBuf(PARAMETER_VALUE); + const VectorPtr& ref = parameters_[REF][i]->getBuf(PARAMETER_VALUE); + VLOG(lvl_) << "Mkldnn Output weight " << parameters_[DNN][i]->getName(); + printVector(dnn); + VLOG(lvl_) << "Reference Output weight " << parameters_[REF][i]->getName(); + printVector(ref); + + double delta = compareVector(dnn, ref); + EXPECT_LE(fabs(delta), eps_); + } + + VLOG(MKLDNN_ALL) << "Restore dnn weights before comapre"; + restoreWgt(dnnWgts, parameters_[DNN]); +} + +void MKLDNNTester::saveWgt(const vector& from, + vector& to) { + const bool useGpu = false; + to.resize(from.size()); + for (size_t i = 0; i < to.size(); ++i) { + const VectorPtr& wgt = from[i]->getBuf(PARAMETER_VALUE); + to[i] = Vector::create(wgt->getSize(), useGpu); + to[i]->copyFrom(*wgt); + } +} + +void MKLDNNTester::restoreWgt(const vector& from, + vector& to) { + CHECK_EQ(from.size(), to.size()); + for (size_t i = 0; i < from.size(); ++i) { + const VectorPtr& wgt = to[i]->getBuf(PARAMETER_VALUE); + wgt->copyFrom(*from[i]); + } +} + +// clear parameters grad +void MKLDNNTester::clearWgtDiffs() { + for (size_t n = 0; n < parameters_.size(); ++n) { + for (size_t i = 0; i < parameters_[n].size(); ++i) { + const VectorPtr& grad = parameters_[n][i]->getBuf(PARAMETER_GRADIENT); + if (grad) { + grad->zeroMem(); + } + } + } +} + +void MKLDNNTester::clearBotDiffs() { + // dnn and ref + for (size_t n = 0; n < dataLayers_.size(); ++n) { + // all inputs layers + for (size_t i = 0; i < dataLayers_[n].size(); ++i) { + dataLayers_[n][i]->getOutputGrad()->zeroMem(); + } + } +} + +void MKLDNNTester::clearBotDiffs(int n) { + CHECK_LT(n, NUM); + // all inputs layers + for (size_t i = 0; i < dataLayers_[n].size(); ++i) { + dataLayers_[n][i]->getOutputGrad()->zeroMem(); + } +} + +void MKLDNNTester::clearTopDatas() { + for (size_t i = 0; i < testLayers_.size(); ++i) { + testLayers_[i]->getOutputValue()->zeroMem(); + } +} + +void MKLDNNTester::printTopDatas() { + if (!log_) { + return; + } + + for (int n = 0; n < NUM; ++n) { + VLOG(lvl_) << testLayers_[n]->getType() << " forward output TopData: "; + printMatrix(testLayers_[n]->getOutputValue()); + } +} + +void MKLDNNTester::printMatrix(const MatrixPtr& m) { + if (!log_) { + return; + } + + std::ostringstream ostr; + m->print(ostr); + VLOG(lvl_) << std::endl << ostr.str(); +} + +void MKLDNNTester::printVector(const VectorPtr& v) { + if (!log_) { + return; + } + + std::ostringstream ostr; + v->print(ostr, v->getSize()); + VLOG(lvl_) << std::endl << ostr.str(); +} + +double MKLDNNTester::getDelta(const real* d1, + const real* d2, + size_t len, + const float failRate, + const float thres) { + double delta = 0, sum = 0; + int failCnt = 0; + const double eps = 1e-5; + double maxOut = 0; + for (size_t i = 0; i < len; ++i) { + double ref = fabs(d2[i]); + double diff = fabs(d1[i] - d2[i]); + delta += diff; + sum += ref; + if (ref > eps && fabs(d1[i]) > eps && diff / ref > thres) { + maxOut = std::max(maxOut, diff / ref); + failCnt++; + } + } + EXPECT_TRUE(std::isnormal(sum)); + EXPECT_FALSE(std::isinf(sum)); + EXPECT_FALSE(std::isnan(delta)); + VLOG(MKLDNN_ALL) << "reference avg data: " << sum / len + << ", delta: " << delta / sum << ", failCnt:" << failCnt; + return (failCnt / (float)len) > failRate ? maxOut : delta / sum; +} + +double MKLDNNTester::compareMatrix(const MatrixPtr& m1, const MatrixPtr& m2) { + CHECK_EQ(m1->getElementCnt(), m2->getElementCnt()); + return getDelta(m1->getData(), m2->getData(), m1->getElementCnt()); +} + +double MKLDNNTester::compareVector(const VectorPtr& v1, const VectorPtr& v2) { + CHECK_EQ(v1->getSize(), v2->getSize()); + return getDelta(v1->getData(), v2->getData(), v1->getSize()); +} + +void MKLDNNTester::runOnce() { + // test forward + randomBotDatas(); + dnnLayer_->forward(PASS_TRAIN); + refLayer_->forward(PASS_TRAIN); + checkForward(); + + // test backward + randomTopDiffs(); + dnnLayer_->backward(nullptr); + refLayer_->backward(nullptr); + checkBackwardData(); + checkBackwardWgts(); + + // clear buffers + // ref code will addto the diff, dnn code will writeto it + // and clearTopDatas() and clearWgtDiffs() should be coverd by test layers + clearBotDiffs(REF); +} + +void MKLDNNTester::run(const TestConfig& dnn, + const TestConfig& ref, + size_t batchSize, + size_t inputImgH, + size_t inputImgW, + size_t iter, + float epsilon, + bool log, + int level) { + VLOG(MKLDNN_TESTS) << "Test MKLDNN functionality: " << dnn.layerConfig.type() + << " vs " << ref.layerConfig.type(); + ih_ = inputImgH; + iw_ = inputImgW; + iter_ = iter; + eps_ = epsilon; + log_ = log; + lvl_ = level; + + // Firstly test FLAGS_use_mkldnn_wgt = false + FLAGS_use_mkldnn_wgt = false; + // reset and run once + reset(dnn, ref, batchSize); + randomWgtDatas(); + clearWgtDiffs(); + clearBotDiffs(); + for (size_t i = 0; i < iter_; ++i) { + VLOG(MKLDNN_TESTS) << "Check Iteration " << i; + runOnce(); + } + + // Then test FLAGS_use_mkldnn_wgt = true + FLAGS_use_mkldnn_wgt = true; + // after run once the mkldnn weight has been stored in dnnlayer + // then save the weights and restart again + vector dnnWgts, refWgts; + CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size()); + saveWgt(parameters_[DNN], dnnWgts); + saveWgt(parameters_[REF], refWgts); + + // restart again with flag true + reset(dnn, ref, batchSize); + + // restore wgt + restoreWgt(dnnWgts, parameters_[DNN]); + restoreWgt(refWgts, parameters_[REF]); + clearWgtDiffs(); + clearBotDiffs(); + + for (size_t i = 0; i < iter_; ++i) { + VLOG(MKLDNN_TESTS) << "Check Iteration " << i; + runOnce(); + } +} + +} // namespace paddle diff --git a/paddle/gserver/tests/MKLDNNTester.h b/paddle/gserver/tests/MKLDNNTester.h new file mode 100644 index 0000000000000000000000000000000000000000..522eeaf24b1949abac057a1e59e9977610be23c0 --- /dev/null +++ b/paddle/gserver/tests/MKLDNNTester.h @@ -0,0 +1,120 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include "LayerGradUtil.h" +#include "paddle/gserver/layers/MKLDNNBase.h" + +namespace paddle { + +/** + * @brief test the functionality of Mkldnnlayers + * refer to paddle original function + */ +class MKLDNNTester { + enum { + DNN = 0, // MKLDNN layer + REF = 1, // Reference layer + NUM = 2, // Number of total + }; + +protected: + std::vector configs_; + vector layerNames_; + vector> dataLayers_; + vector> datas_; + vector layerMaps_; + vector> parameters_; + vector testLayers_; + LayerPtr dnnLayer_, refLayer_; + + /// run some iterations, all the result should pass + size_t iter_; + /// whether to print out the details + bool log_; + /// vlog level to print the matrix details datas + int lvl_; + /// epsilon + float eps_; + /// input image size, default 1 + size_t ih_, iw_; + +public: + explicit MKLDNNTester(size_t iter = 3, float epsilon = 1e-4) { + iter_ = iter; + eps_ = epsilon; + log_ = false; + lvl_ = MKLDNN_ALL; + } + + ~MKLDNNTester() {} + +public: + void run(const TestConfig& dnn, + const TestConfig& ref, + size_t batchSize, + size_t inputImgH = 1, + size_t inputImgW = 1, + size_t iter = 3, + float epsilon = 1e-4, + bool log = false, + int level = MKLDNN_ALL); + void setLogLevel(int lvl) { lvl_ = lvl; } + +private: + void reset(const TestConfig& dnn, const TestConfig& ref, size_t batchSize); + void setInputImgSize(); + void runOnce(); + + void randomWgtDatas(); + void randomBotDatas(); + void randomTopDiffs(); + + void checkForward(); + void checkBackwardData(); + void checkBackwardWgts(); + + void clearWgtDiffs(); + void clearBotDiffs(); + void clearBotDiffs(int n); // clear specific layer + void clearTopDatas(); + + void printTopDatas(); + void printMatrix(const MatrixPtr& m); + void printVector(const VectorPtr& v); + + void saveWgt(const vector& from, vector& to); + void restoreWgt(const vector& from, vector& to); + + double compareMatrix(const MatrixPtr& m1, const MatrixPtr& m2); + double compareVector(const VectorPtr& v1, const VectorPtr& v2); + + /** + * Get delta percent + * if many(>failRate) wrong(abs(dnn-ref)/abs(ref)>thres) points return the + * max(diff/ref) + * else return sum(abs(a-b)) / sum(abs(b)) + * The return value should smaller than eps when passing. + */ + double getDelta(const real* d1, + const real* d2, + size_t len, + const float failRate = 1e-3, + const float thres = 0.1); +}; + +} // namespace paddle diff --git a/paddle/gserver/tests/test_KmaxSeqScore.cpp b/paddle/gserver/tests/test_KmaxSeqScore.cpp index a51fe390c74d74cd5f3d07df62b715b239335548..308abe6816428bc0f98ec32e892622fa4a23b1ae 100644 --- a/paddle/gserver/tests/test_KmaxSeqScore.cpp +++ b/paddle/gserver/tests/test_KmaxSeqScore.cpp @@ -96,6 +96,11 @@ TEST(Layer, kmaxSeqScoreLayer) { MatrixPtr inValue = Matrix::create(subSeqStartPosition.back(), 1, false, false); + std::vector mode = {false}; +#ifndef PADDLE_ONLY_CPU + mode.push_back(true); +#endif + for (auto hasSubseq : {false, true}) { vector> groundTruth; inValue->randomizeUniform(); @@ -104,7 +109,7 @@ TEST(Layer, kmaxSeqScoreLayer) { hasSubseq ? subSeqStartPosition : seqStartPosition, beamSize); - for (auto useGpu : {false, true}) { + for (auto useGpu : mode) { TestConfig config; config.layerConfig.set_type("kmax_seq_score"); config.layerConfig.set_beam_size(beamSize); diff --git a/paddle/gserver/tests/test_MKLDNN.cpp b/paddle/gserver/tests/test_MKLDNN.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e1d2270df24331914f3a51acc90a518084b3ce4e --- /dev/null +++ b/paddle/gserver/tests/test_MKLDNN.cpp @@ -0,0 +1,76 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include "MKLDNNTester.h" +#include "ModelConfig.pb.h" + +using namespace paddle; // NOLINT + +DECLARE_bool(thread_local_rand_use_global_seed); +DECLARE_bool(use_gpu); +DECLARE_bool(use_mkldnn); + +struct testFCDesc { + int bs; + int ic; + int oc; + int ih, iw; // oh == ow == 1 +}; + +void testFcLayer(const testFCDesc& pm) { + const std::string compareTypes[] = {"mkldnn_fc", "fc"}; + TestConfig cfg; + cfg.layerConfig.set_type(compareTypes[0]); + cfg.layerConfig.set_size(pm.oc); + cfg.inputDefs.push_back( + {INPUT_DATA, + "layer_0", + /* size of input layer= */ size_t(pm.ic * pm.ih * pm.iw), + /* size of weight= */ size_t(pm.oc * pm.ic * pm.ih * pm.iw)}); + cfg.layerConfig.add_inputs(); + + MKLDNNTester tester; + for (auto biasSize : {pm.oc, 0}) { + cfg.biasSize = biasSize; + TestConfig ref = cfg; + ref.layerConfig.set_type(compareTypes[1]); + for (auto bs : {pm.bs, 1}) { + tester.run(cfg, ref, bs, pm.ih, pm.iw); + } + } +} + +TEST(MKLDNNLayer, FcLayer) { + testFcLayer({/*bs*/ 2, /*ic*/ 2, /*oc*/ 3, /*ih*/ 1, /*iw*/ 1}); + testFcLayer({/*bs*/ 3, /*ic*/ 7, /*oc*/ 19, /*ih*/ 1, /*iw*/ 1}); + testFcLayer({/*bs*/ 8, /*ic*/ 16, /*oc*/ 32, /*ih*/ 13, /*iw*/ 13}); + testFcLayer({/*bs*/ 4, /*ic*/ 12, /*oc*/ 18, /*ih*/ 13, /*iw*/ 11}); + testFcLayer({/*bs*/ 2, /*ic*/ 64, /*oc*/ 32, /*ih*/ 16, /*iw*/ 16}); + testFcLayer({/*bs*/ 15, /*ic*/ 3, /*oc*/ 6, /*ih*/ 16, /*iw*/ 16}); +} + +// TODO(TJ): add branch test + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + FLAGS_use_gpu = false; + FLAGS_use_mkldnn = true; + initMain(argc, argv); + FLAGS_thread_local_rand_use_global_seed = true; + srand(1); + return RUN_ALL_TESTS(); +} diff --git a/paddle/math/CMakeLists.txt b/paddle/math/CMakeLists.txt index 9981de61606bda6baac103592125b929d4c12a3d..bf28092e82b778dc904c5a2e271f76261cf5f6b6 100644 --- a/paddle/math/CMakeLists.txt +++ b/paddle/math/CMakeLists.txt @@ -15,13 +15,13 @@ file(GLOB MATH_HEADERS . *.h) file(GLOB MATH_SOURCES . *.cpp) set(MATH_SOURCES - "${PROJ_ROOT}/paddle/math/BaseMatrix.cu" - "${PROJ_ROOT}/paddle/math/TrainingAlgorithmOp.cu" + "${PADDLE_SOURCE_DIR}/paddle/math/BaseMatrix.cu" + "${PADDLE_SOURCE_DIR}/paddle/math/TrainingAlgorithmOp.cu" ${MATH_SOURCES}) if(NOT WITH_GPU) # then compile BaseMatrix.cu as c++ file - compile_cu_as_cpp("${PROJ_ROOT}/paddle/math/BaseMatrix.cu") - compile_cu_as_cpp("${PROJ_ROOT}/paddle/math/TrainingAlgorithmOp.cu") + compile_cu_as_cpp("${PADDLE_SOURCE_DIR}/paddle/math/BaseMatrix.cu") + compile_cu_as_cpp("${PADDLE_SOURCE_DIR}/paddle/math/TrainingAlgorithmOp.cu") add_library(paddle_math STATIC ${MATH_SOURCES}) else() diff --git a/paddle/math/CpuSparseMatrix.h b/paddle/math/CpuSparseMatrix.h index 860cad1047fc343b13efa901186ea218d0855151..36d57bbb65245de6b0de5909b55fbc4be3eccd78 100644 --- a/paddle/math/CpuSparseMatrix.h +++ b/paddle/math/CpuSparseMatrix.h @@ -302,6 +302,10 @@ public: bool isSparse() const { return true; } private: + using Matrix::mul; using Matrix::copyFrom; + using Matrix::rowMax; + using Matrix::print; + using Matrix::subMatrix; }; } // namespace paddle diff --git a/paddle/math/SparseMatrix.h b/paddle/math/SparseMatrix.h index f6cd5df338965b55ca17636de097d2401dc057f9..16300db081f89182faa82ea5798e8ec2f1cd93f9 100644 --- a/paddle/math/SparseMatrix.h +++ b/paddle/math/SparseMatrix.h @@ -231,6 +231,9 @@ public: private: using Matrix::mul; using Matrix::copyFrom; + using Matrix::rowMax; + using Matrix::print; + using Matrix::subMatrix; }; } // namespace paddle diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 9e4026d1c66ccd30ecfc37b3e819241cb85b1a1a..373611cc0ee952de813f01d32d1516e1a8384750 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -41,31 +41,29 @@ function(op_library TARGET) endif() endfunction() +add_subdirectory(math) +cc_test(gather_test SRCS gather_test.cc DEPS tensor) + cc_library(net_op SRCS net_op.cc DEPS op_registry) cc_test(net_op_test SRCS net_op_test.cc DEPS net_op) op_library(add_op SRCS add_op.cc add_op.cu) -cc_test(add_op_test SRCS add_op_test.cc DEPS add_op) op_library(mean_op SRCS mean_op.cc mean_op.cu) -cc_test(mean_op_test SRCS mean_op_test.cc DEPS mean_op) -op_library(mul_op SRCS mul_op.cc mul_op.cu) +op_library(mul_op SRCS mul_op.cc mul_op.cu DEPS math_function) op_library(rowwise_add_op SRCS rowwise_add_op.cu rowwise_add_op.cc) op_library(sigmoid_op SRCS sigmoid_op.cc sigmoid_op.cu) op_library(softmax_op SRCS softmax_op.cc softmax_op.cu) +op_library(gaussian_random_op SRCS gaussian_random_op.cc gaussian_random_op.cu) op_library(cross_entropy_op SRCS cross_entropy_op.cc cross_entropy_op.cu) op_library(fill_zeros_like_op SRCS fill_zeros_like_op.cc fill_zeros_like_op.cu) op_library(sgd_op SRCS sgd_op.cc sgd_op.cu) -cc_test(sgd_op_test SRCS sgd_op_test.cc DEPS sgd_op) -op_library(fc_op - SRCS fc_op.cc - DEPS mul_op rowwise_add_op sigmoid_op softmax_op net_op) op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc - DEPS op_desc tensor op_registry operator net_op) + DEPS framework_proto tensor op_registry operator net_op) cc_test(recurrent_op_test SRCS recurrent_op_test.cc DEPS recurrent_op gtest mul_op add_op) op_library(uniform_random_op SRCS uniform_random_op.cc uniform_random_op.cu) diff --git a/paddle/operators/add_op.cc b/paddle/operators/add_op.cc index 086245ef62d759ab20a3684ddbc015f6c6258639..c1f647a88e4547d96bbb9143cdb2cb07bc291635 100644 --- a/paddle/operators/add_op.cc +++ b/paddle/operators/add_op.cc @@ -18,16 +18,15 @@ namespace paddle { namespace operators { class AddOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE_EQ(ctx.InputSize(), 2); - PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), "Inputs of AddOp must all be set"); - PADDLE_ENFORCE(ctx.OutputVar(0) != nullptr, - "Outputs of AddOp must all be set"); - PADDLE_ENFORCE(ctx.Input(0)->dims() == ctx.Input(1)->dims(), - "Two input of Add Op's dimension must be same."); - ctx.Output(0)->Resize(ctx.Input(0)->dims()); + PADDLE_ENFORCE_EQ(ctx.Input("X")->dims(), + ctx.Input("Y")->dims(), + "Two input of Add Op's dimension must be same."); + ctx.Output("Out")->Resize(ctx.Input("X")->dims()); } }; @@ -47,6 +46,9 @@ The equation is: Out = X + Y }; class AddOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override {} }; diff --git a/paddle/operators/add_op.h b/paddle/operators/add_op.h index d76c10957e943deb970b1d79a1507a36669314e3..a7307b6818aa3d10ff215d06281e2b53196fd101 100644 --- a/paddle/operators/add_op.h +++ b/paddle/operators/add_op.h @@ -28,9 +28,9 @@ template class AddKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto input0 = context.Input(0); - auto input1 = context.Input(1); - auto output = context.Output(0); + auto* input0 = context.Input("X"); + auto* input1 = context.Input("Y"); + auto* output = context.Output("Out"); output->mutable_data(context.GetPlace()); diff --git a/paddle/operators/add_op_test.cc b/paddle/operators/add_op_test.cc deleted file mode 100644 index bf529defb20d27200a28666278db8607b986e2d5..0000000000000000000000000000000000000000 --- a/paddle/operators/add_op_test.cc +++ /dev/null @@ -1,28 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#define private public -#include "paddle/framework/op_registry.h" - -USE_OP(add_two); - -TEST(AddOp, GetOpProto) { - auto& protos = paddle::framework::OpRegistry::protos(); - auto it = protos.find("add_two"); - ASSERT_NE(it, protos.end()); - auto& op_creators = paddle::framework::OpRegistry::op_creators(); - auto it1 = op_creators.find("add_two_grad"); - ASSERT_NE(it1, op_creators.end()); -} diff --git a/paddle/operators/cross_entropy_op.cc b/paddle/operators/cross_entropy_op.cc index c813d54e17fa48aa4447ef76b918b7355be52b09..597c71d4e042e6b6a752c0b1819b909a7a9faa75 100644 --- a/paddle/operators/cross_entropy_op.cc +++ b/paddle/operators/cross_entropy_op.cc @@ -18,26 +18,25 @@ namespace paddle { namespace operators { class OnehotCrossEntropyOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE_EQ(ctx.InputSize(), 2, - "Input size of OnehotCrossEntropyOp must be two"); - PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1, - "Output size of OnehotCrossEntropyOp must be one"); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), - "0-th input of OnehotCrossEntropyOp should be set"); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(1), - "1-th input of OnehotCrossEntropyOp should be set"); - PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar(0), - "Outputs of OnehotCrossEntropyOp must all be set"); - PADDLE_ENFORCE_EQ(ctx.Input(0)->dims().size(), 2); - PADDLE_ENFORCE_EQ(ctx.Output(0)->dims().size(), 1, - "label's dimension must be 1."); - ctx.Output(0)->Resize({ctx.Input(0)->dims()[0]}); + auto *X = ctx.Input("X"); + auto *label = ctx.Input("label"); + + PADDLE_ENFORCE_EQ(X->dims().size(), 2, "X's dimension must be 2."); + PADDLE_ENFORCE_EQ(label->dims().size(), 1, "label's dimension must be 1."); + PADDLE_ENFORCE_EQ(X->dims()[0], label->dims()[0]); + ctx.Output("Y")->Resize({X->dims()[0]}); } }; class OnehotCrossEntropyGradientOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override { auto X_grad = ctx.Output(framework::GradVarName("X")); diff --git a/paddle/operators/cross_entropy_op.h b/paddle/operators/cross_entropy_op.h index d1bbc2cb66d6ce84ddcdcb87648f23c6ce77b748..b7df92c9a98ebf12b72a8d3d8e8e4e1a950f06c9 100644 --- a/paddle/operators/cross_entropy_op.h +++ b/paddle/operators/cross_entropy_op.h @@ -45,7 +45,7 @@ class OnehotCrossEntropyOpKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto X = ctx.Input("X"); const T* Xdata = X->data(); - const int* label_data = ctx.Input(1)->data(); + const int* label_data = ctx.Input("label")->data(); auto Y = ctx.Output("Y"); Y->mutable_data(ctx.GetPlace()); diff --git a/paddle/operators/fc_op.cc b/paddle/operators/fc_op.cc deleted file mode 100644 index 01a1a81206f160386467b3c789a41206d89576b6..0000000000000000000000000000000000000000 --- a/paddle/operators/fc_op.cc +++ /dev/null @@ -1,76 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#include "paddle/operators/net_op.h" - -#include "paddle/framework/eigen.h" -#include "paddle/framework/op_registry.h" - -namespace paddle { -namespace operators { - -using OpRegistry = framework::OpRegistry; - -class FullyConnectedOp : public NetOp { - public: - void Init() override { - AddOp(OpRegistry::CreateOp("mul", - { - Input("X"), Input("W"), - }, - {Output("before_act")}, {})); - auto b = Input("b"); - if (b != framework::kEmptyVarName) { - AddOp(OpRegistry::CreateOp("rowwise_add", - {Output("before_act"), Input("b")}, - {Output("before_act")}, {})); - } - - auto activation = GetAttr("activation"); - AddOp(OpRegistry::CreateOp(activation, {Output("before_act")}, - {Output("Y")}, {})); - CompleteAddOp(false); - } -}; - -class FullyConnectedOpMaker : public framework::OpProtoAndCheckerMaker { - public: - FullyConnectedOpMaker(framework::OpProto *proto, - framework::OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "the input of fc operator"); - AddInput("W", "the weight of fc operator"); - AddInput("b", "the bias of fc operator"); - - AddOutput("Y", "the output of fc operator"); - AddOutput("before_act", "the before activation output of fc operator") - .SetTemporary(); - AddAttr("activation", "The activation key for fc layer") - .SetDefault("sigmoid") - .InEnum({"sigmoid", "softmax"}); - - //! TODO(yuyang18): Complete comment; - AddComment("FullyConnected Operator"); - } -}; -} // namespace operators -} // namespace paddle - -USE_OP(mul); -USE_OP(rowwise_add); -USE_OP(sigmoid); -USE_OP(softmax); - -namespace ops = paddle::operators; -REGISTER_OP(fc, ops::FullyConnectedOp, ops::FullyConnectedOpMaker); diff --git a/paddle/operators/fill_zeros_like_op.cc b/paddle/operators/fill_zeros_like_op.cc index 3759a886780e555ccdc6286c4b200a5d14214691..e42e33f1a3759ae26cee987d0b68a55b672e3f94 100644 --- a/paddle/operators/fill_zeros_like_op.cc +++ b/paddle/operators/fill_zeros_like_op.cc @@ -18,18 +18,13 @@ namespace paddle { namespace operators { class FillZerosLikeOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE_EQ(ctx.InputSize(), 1UL, - "Input size of FillZerosLikeOp must be one."); - PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1UL, - "Output size of AddOp must be one."); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), - "Input of FillZerosLikeOp must be set."); - PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar(0), - "Output of FillZerosLikeOp must be set."); - ctx.Output(0)->Resize( - ctx.Input(0)->dims()); + ctx.Output("Dst")->Resize( + ctx.Input("Src")->dims()); } }; diff --git a/paddle/operators/fill_zeros_like_op.h b/paddle/operators/fill_zeros_like_op.h index f846c7a8ab15e2cd997564edb36660a1360227a8..fd380ca8514b0ac50f39613368a4836bd485668b 100644 --- a/paddle/operators/fill_zeros_like_op.h +++ b/paddle/operators/fill_zeros_like_op.h @@ -23,7 +23,7 @@ template class FillZerosLikeKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* output = context.Output(0); + auto* output = context.Output("Dst"); output->mutable_data(context.GetPlace()); auto t = framework::EigenVector::Flatten(*output); t.device(context.GetEigenDevice()) = t.constant(T(0)); diff --git a/paddle/operators/gather.h b/paddle/operators/gather.h new file mode 100644 index 0000000000000000000000000000000000000000..0c73717d38aca9f3430e66cafc3ecccdd2eec776 --- /dev/null +++ b/paddle/operators/gather.h @@ -0,0 +1,73 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include + +#include "paddle/framework/ddim.h" +#include "paddle/framework/tensor.h" +#include "paddle/platform/place.h" + +namespace paddle { +namespace operators { + +// Implementation of CPU copy +template +void CPUGather(const T* params, const int* indices, const int slice_size, + const int index_size, T* output) { + const size_t slice_bytes = slice_size * sizeof(T); + + for (size_t i = 0; i < index_size; ++i) { + int index_ = indices[i]; + memcpy(output + i * slice_size, params + index_ * slice_size, slice_bytes); + } +} + +// Implementation of GPU copy: +template +void GPUGather(const T* src, const int* index, const int slice_size, + const int index_size, T* output); + +/** + * Return a new tensor from source tensor, gathered according to index + * input[src]: type-T source Tensor + * input[index]: type-int index Tensor (1-D) + * return: output tensor + */ +template +void Gather(const platform::Place& place, const paddle::framework::Tensor* src, + const paddle::framework::Tensor* index, + paddle::framework::Tensor* output) { + // check index of shape 1-D + PADDLE_ENFORCE(index->dims().size() == 1); + int index_size = index->dims()[0]; + + auto src_dims = src->dims(); + paddle::framework::DDim output_dims(src_dims); + output_dims[0] = index_size; + + // slice size + int slice_size = 1; + for (size_t i = 1; i < src_dims.size(); ++i) slice_size *= src_dims[i]; + + // Gathering + if (platform::is_cpu_place(place)) { + CPUGather(src->data(), index->data(), slice_size, index_size, + output->data()); + } +} + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/gather_test.cc b/paddle/operators/gather_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..5de748ec461e4b1a34b75b57c9cd7d5bc9326059 --- /dev/null +++ b/paddle/operators/gather_test.cc @@ -0,0 +1,48 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/gather.h" +#include "paddle/framework/ddim.h" +#include "paddle/framework/tensor.h" +#include "paddle/platform/place.h" + +#include +#include +#include + +TEST(Gather, GatherData) { + using namespace paddle::framework; + using namespace paddle::platform; + using namespace paddle::operators; + + Tensor* src = new Tensor(); + Tensor* index = new Tensor(); + Tensor* output = new Tensor(); + + int* p_src = nullptr; + int* p_index = nullptr; + p_src = src->mutable_data(make_ddim({3, 4}), CPUPlace()); + p_index = index->mutable_data(make_ddim({2}), CPUPlace()); + + for (size_t i = 0; i < 12; ++i) p_src[i] = i; + p_index[0] = 1; + p_index[1] = 0; + + int* p_output = output->mutable_data(make_ddim({2, 4}), CPUPlace()); + + Gather(CPUPlace(), src, index, output); + + for (size_t i = 0; i < 4; ++i) EXPECT_EQ(p_output[i], i + 4); + for (size_t i = 4; i < 8; ++i) EXPECT_EQ(p_output[i], i - 4); +} diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..75249c08eb00095615fc75eb9261432d64246b2e --- /dev/null +++ b/paddle/operators/gaussian_random_op.cc @@ -0,0 +1,85 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +template +class GaussianRandomKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + float mean = context.op_.GetAttr("mean"); + float std = context.op_.GetAttr("std"); + auto* tensor = context.Output(0); + T* data = tensor->mutable_data(context.GetPlace()); + + // TODO(dzh): attribute does not support unsigned int. + // And we need a global random seed configuration. + int seed = context.op_.GetAttr("seed"); + if (seed == 0) { + seed = std::random_device()(); + } + std::mt19937 g(seed); + std::normal_distribution distribution(mean, std); + ssize_t size = framework::product(tensor->dims()); + for (int i = 0; i < size; ++i) { + data[i] = distribution(g); + } + } +}; + +class GaussianRandomOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext& context) const override { + auto* tensor = context.Output(0); + auto dims = GetAttr>("dims"); + PADDLE_ENFORCE(dims.size() > 0UL, + "dims can be one int or array. dims must be set."); + tensor->Resize(framework::make_ddim(dims)); + } +}; + +class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker { + public: + GaussianRandomOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : framework::OpProtoAndCheckerMaker(proto, op_checker) { + AddOutput("Out", "output matrix of random op"); + AddComment(R"DOC( +GaussianRandom operator. +Use to initialize tensor with gaussian random generator. +)DOC"); + + AddAttr>("dims", "The dimension of random tensor."); + AddAttr("mean", "mean value of random.").SetDefault(.0f); + AddAttr("std", "minimum value of random value.").SetDefault(1.0f); + AddAttr("seed", + "Random seed of generator." + "0 means use system wide seed") + .SetDefault(0); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(gaussian_random, ops::GaussianRandomOp, ops::GaussianRandomOpMaker); +REGISTER_OP_CPU_KERNEL(gaussian_random, ops::GaussianRandomKernel); diff --git a/paddle/operators/gaussian_random_op.cu b/paddle/operators/gaussian_random_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..1340b1e1e9f19fd96ced9e57fab75fe9d33bc84e --- /dev/null +++ b/paddle/operators/gaussian_random_op.cu @@ -0,0 +1,53 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include +#include "paddle/platform/dynload/curand.h" +#include "paddle/platform/gpu_info.h" + +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +template +class GaussianRandomKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + float mean = context.op_.GetAttr("mean"); + float std = context.op_.GetAttr("std"); + auto* tensor = context.Output(0); + T* data = tensor->mutable_data(context.GetPlace()); + + int seed = context.op_.GetAttr("seed"); + if (seed == 0) { + std::random_device rd; + seed = rd(); + } + curandGenerator_t g; + PADDLE_ENFORCE(platform::dynload::curandCreateGenerator( + &g, CURAND_RNG_PSEUDO_DEFAULT)); + PADDLE_ENFORCE( + platform::dynload::curandSetPseudoRandomGeneratorSeed(g, seed)); + platform::dynload::curandGenerateNormal( + g, data, framework::product(tensor->dims()), mean, std); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(gaussian_random, ops::GaussianRandomKernel); diff --git a/paddle/operators/math/CMakeLists.txt b/paddle/operators/math/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..abcaf940ab0128d6948acc620d678632c8f48960 --- /dev/null +++ b/paddle/operators/math/CMakeLists.txt @@ -0,0 +1,13 @@ +if(WITH_MKLML) + set(BLAS_LIB mklml) +else() + set(BLAS_LIB cblas) +endif() + +if(WITH_GPU) + nv_library(math_function SRCS math_function.cc math_function.cu DEPS ${BLAS_LIB} device_context) +else() + cc_library(math_function SRCS math_function.cc DEPS ${BLAS_LIB} device_context) +endif() + +nv_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor) diff --git a/paddle/operators/math/math_function.cc b/paddle/operators/math/math_function.cc new file mode 100644 index 0000000000000000000000000000000000000000..affdd1ac2cd486930881ee6b34a4b32f41df7ee9 --- /dev/null +++ b/paddle/operators/math/math_function.cc @@ -0,0 +1,114 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/math/math_function.h" + +namespace paddle { +namespace operators { +namespace math { + +template <> +void gemm(const CBLAS_TRANSPOSE transA, + const CBLAS_TRANSPOSE transB, const int M, + const int N, const int K, + const float alpha, const float* A, + const float* B, const float beta, float* C, + platform::DeviceContext* context) { + int lda = K; + int ldb = N; + int ldc = N; + cblas_sgemm(CblasRowMajor, transA, transB, M, N, K, alpha, A, lda, B, ldb, + beta, C, ldc); +} + +template <> +void gemm(const CBLAS_TRANSPOSE transA, + const CBLAS_TRANSPOSE transB, const int M, + const int N, const int K, + const double alpha, const double* A, + const double* B, const double beta, + double* C, + platform::DeviceContext* context) { + int lda = K; + int ldb = N; + int ldc = N; + cblas_dgemm(CblasRowMajor, transA, transB, M, N, K, alpha, A, lda, B, ldb, + beta, C, ldc); +} + +template <> +void matmul(const framework::Tensor& matrix_a, + bool trans_a, + const framework::Tensor& matrix_b, + bool trans_b, float alpha, + framework::Tensor* matrix_out, + float beta, + platform::DeviceContext* context) { + auto dim_a = matrix_a.dims(); + auto dim_b = matrix_b.dims(); + auto dim_out = matrix_out->dims(); + PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 && dim_out.size() == 2, + "The input and output of matmul be matrix"); + + PADDLE_ENFORCE(platform::is_cpu_place(matrix_a.place()) && + platform::is_cpu_place(matrix_b.place()) && + platform::is_cpu_place(matrix_out->place()), + "Matrix must all be in CPUPlace"); + + int M = dim_out[0]; + int N = dim_out[1]; + int K = (trans_a == false) ? dim_a[1] : dim_a[0]; + + CBLAS_TRANSPOSE transA = (trans_a == false) ? CblasNoTrans : CblasTrans; + CBLAS_TRANSPOSE transB = (trans_b == false) ? CblasNoTrans : CblasTrans; + + gemm( + transA, transB, M, N, K, alpha, matrix_a.data(), + matrix_b.data(), beta, matrix_out->data(), context); +} + +template <> +void matmul(const framework::Tensor& matrix_a, + bool trans_a, + const framework::Tensor& matrix_b, + bool trans_b, double alpha, + framework::Tensor* matrix_out, + double beta, + platform::DeviceContext* context) { + auto dim_a = matrix_a.dims(); + auto dim_b = matrix_b.dims(); + auto dim_out = matrix_out->dims(); + PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 && dim_out.size() == 2, + "The input and output of matmul be matrix"); + + PADDLE_ENFORCE(platform::is_cpu_place(matrix_a.place()) && + platform::is_cpu_place(matrix_b.place()) && + platform::is_cpu_place(matrix_out->place()), + "Matrix must all be in CPUPlace"); + + int M = dim_out[0]; + int N = dim_out[1]; + int K = (trans_a == false) ? dim_a[1] : dim_a[0]; + + CBLAS_TRANSPOSE transA = (trans_a == false) ? CblasNoTrans : CblasTrans; + CBLAS_TRANSPOSE transB = (trans_b == false) ? CblasNoTrans : CblasTrans; + + gemm( + transA, transB, M, N, K, alpha, matrix_a.data(), + matrix_b.data(), beta, matrix_out->data(), context); +} + +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/math_function.cu b/paddle/operators/math/math_function.cu new file mode 100644 index 0000000000000000000000000000000000000000..da40b27c948918e4997f4a046d2145552296158b --- /dev/null +++ b/paddle/operators/math/math_function.cu @@ -0,0 +1,127 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/math/math_function.h" + +namespace paddle { +namespace operators { +namespace math { + +template <> +void gemm(const CBLAS_TRANSPOSE transA, + const CBLAS_TRANSPOSE transB, const int M, + const int N, const int K, + const float alpha, const float* A, + const float* B, const float beta, float* C, + platform::DeviceContext* context) { + // Note that cublas follows fortran order, so the order is different from + // the cblas convention. + int lda = (transA == CblasNoTrans) ? K : M; + int ldb = (transB == CblasNoTrans) ? N : K; + cublasOperation_t cuTransA = + (transA == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T; + cublasOperation_t cuTransB = + (transB == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T; + + PADDLE_ENFORCE(platform::dynload::cublasSgemm( + reinterpret_cast(context)->cublas_handle(), + cuTransB, cuTransA, N, M, K, &alpha, B, ldb, A, lda, &beta, C, N)); +} + +template <> +void gemm(const CBLAS_TRANSPOSE transA, + const CBLAS_TRANSPOSE transB, const int M, + const int N, const int K, + const double alpha, const double* A, + const double* B, const double beta, + double* C, + platform::DeviceContext* context) { + // Note that cublas follows fortran order, so the order is different from + // the cblas convention. + int lda = (transA == CblasNoTrans) ? K : M; + int ldb = (transB == CblasNoTrans) ? N : K; + cublasOperation_t cuTransA = + (transA == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T; + cublasOperation_t cuTransB = + (transB == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T; + PADDLE_ENFORCE(platform::dynload::cublasDgemm( + reinterpret_cast(context)->cublas_handle(), + cuTransB, cuTransA, N, M, K, &alpha, B, ldb, A, lda, &beta, C, N)); +} + +template <> +void matmul(const framework::Tensor& matrix_a, + bool trans_a, + const framework::Tensor& matrix_b, + bool trans_b, float alpha, + framework::Tensor* matrix_out, + float beta, + platform::DeviceContext* context) { + auto dim_a = matrix_a.dims(); + auto dim_b = matrix_b.dims(); + auto dim_out = matrix_out->dims(); + PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 && dim_out.size() == 2, + "The input and output of matmul be matrix"); + + PADDLE_ENFORCE(platform::is_gpu_place(matrix_a.place()) && + platform::is_gpu_place(matrix_b.place()) && + platform::is_gpu_place(matrix_out->place()), + "Matrix must all be in GPUPlace"); + + int M = dim_out[0]; + int N = dim_out[1]; + int K = (trans_a == false) ? dim_a[1] : dim_a[0]; + + CBLAS_TRANSPOSE transA = (trans_a == false) ? CblasNoTrans : CblasTrans; + CBLAS_TRANSPOSE transB = (trans_b == false) ? CblasNoTrans : CblasTrans; + + gemm( + transA, transB, M, N, K, alpha, matrix_a.data(), + matrix_b.data(), beta, matrix_out->data(), context); +} + +template <> +void matmul(const framework::Tensor& matrix_a, + bool trans_a, + const framework::Tensor& matrix_b, + bool trans_b, double alpha, + framework::Tensor* matrix_out, + double beta, + platform::DeviceContext* context) { + auto dim_a = matrix_a.dims(); + auto dim_b = matrix_b.dims(); + auto dim_out = matrix_out->dims(); + PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 && dim_out.size() == 2, + "The input and output of matmul be matrix"); + + PADDLE_ENFORCE(platform::is_gpu_place(matrix_a.place()) && + platform::is_gpu_place(matrix_b.place()) && + platform::is_gpu_place(matrix_out->place()), + "Matrix must all be in GPUPlace"); + + int M = dim_out[0]; + int N = dim_out[1]; + int K = (trans_a == false) ? dim_a[1] : dim_a[0]; + + CBLAS_TRANSPOSE transA = (trans_a == false) ? CblasNoTrans : CblasTrans; + CBLAS_TRANSPOSE transB = (trans_b == false) ? CblasNoTrans : CblasTrans; + + gemm( + transA, transB, M, N, K, alpha, matrix_a.data(), + matrix_b.data(), beta, matrix_out->data(), context); +} + +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/math_function.h b/paddle/operators/math/math_function.h new file mode 100644 index 0000000000000000000000000000000000000000..155589fadb3ed9f59160a750d546dd8093a56cbe --- /dev/null +++ b/paddle/operators/math/math_function.h @@ -0,0 +1,82 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#ifdef PADDLE_USE_MKLML +#include +#include +#include +#endif + +#ifdef PADDLE_USE_MKL +#include +#include +#endif + +#ifdef PADDLE_USE_ATLAS +extern "C" { +#include +#include +} +#endif + +#ifdef PADDLE_USE_OPENBLAS +#include +#include +#endif + +#ifndef LAPACK_FOUND +extern "C" { +#include +int LAPACKE_sgetrf(int matrix_layout, int m, int n, float* a, int lda, + int* ipiv); +int LAPACKE_dgetrf(int matrix_layout, int m, int n, double* a, int lda, + int* ipiv); +int LAPACKE_sgetri(int matrix_layout, int n, float* a, int lda, + const int* ipiv); +int LAPACKE_dgetri(int matrix_layout, int n, double* a, int lda, + const int* ipiv); +} +#endif + +#include + +#include "paddle/framework/tensor.h" +#include "paddle/platform/device_context.h" +#include "paddle/platform/enforce.h" + +namespace paddle { +namespace operators { +namespace math { + +// Support continuous memory now +// If transA = N, and transB = N +// Then matrixA: M * K, matrixB: K * N matrixC : M * N +// For more detailed info, please refer to +// http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html +template +void gemm(const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB, + const int M, const int N, const int K, const T alpha, const T* A, + const T* B, const T beta, T* C, platform::DeviceContext* context); + +// matrix multiply with continuous memory +template +void matmul(const framework::Tensor& matrix_a, bool trans_a, + const framework::Tensor& matrix_b, bool trans_b, T alpha, + framework::Tensor* matrix_out, T beta, + platform::DeviceContext* context); + +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/math_function_test.cc b/paddle/operators/math/math_function_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..6c020c4ff7285b43bc5836d80c173d3a068e72b3 --- /dev/null +++ b/paddle/operators/math/math_function_test.cc @@ -0,0 +1,75 @@ +#include "paddle/operators/math/math_function.h" +#include "gtest/gtest.h" + +#ifndef PADDLE_ONLY_CPU +TEST(math_function, notrans_mul_trans) { + paddle::framework::Tensor input1; + paddle::framework::Tensor input1_gpu; + paddle::framework::Tensor input2_gpu; + paddle::framework::Tensor out_gpu; + paddle::framework::Tensor out; + + auto* cpu_place = new paddle::platform::CPUPlace(); + float* input1_ptr = input1.mutable_data({2, 3}, *cpu_place); + float arr[6] = {0, 1, 2, 3, 4, 5}; + memcpy(input1_ptr, arr, 6 * sizeof(float)); + + auto* gpu_place = new paddle::platform::GPUPlace(0); + paddle::platform::DeviceContext* context = + new paddle::platform::CUDADeviceContext(*gpu_place); + + input1_gpu.CopyFrom(input1, *gpu_place); + input2_gpu.CopyFrom(input1, *gpu_place); + + out_gpu.mutable_data({2, 2}, *gpu_place); + + paddle::operators::math::matmul( + input1_gpu, false, input2_gpu, true, 1, &out_gpu, 0, context); + + out.CopyFrom(out_gpu, *cpu_place); + + float* out_ptr = out.data(); + EXPECT_EQ(out_ptr[0], 5); + EXPECT_EQ(out_ptr[1], 14); + EXPECT_EQ(out_ptr[2], 14); + EXPECT_EQ(out_ptr[3], 50); +} + +TEST(math_function, trans_mul_notrans) { + paddle::framework::Tensor input1; + paddle::framework::Tensor input1_gpu; + paddle::framework::Tensor input2_gpu; + paddle::framework::Tensor out_gpu; + paddle::framework::Tensor out; + + auto* cpu_place = new paddle::platform::CPUPlace(); + float* input1_ptr = input1.mutable_data({2, 3}, *cpu_place); + float arr[6] = {0, 1, 2, 3, 4, 5}; + memcpy(input1_ptr, arr, 6 * sizeof(float)); + + auto* gpu_place = new paddle::platform::GPUPlace(0); + paddle::platform::DeviceContext* context = + new paddle::platform::CUDADeviceContext(*gpu_place); + + input1_gpu.CopyFrom(input1, *gpu_place); + input2_gpu.CopyFrom(input1, *gpu_place); + + out_gpu.mutable_data({3, 3}, *gpu_place); + + paddle::operators::math::matmul( + input1_gpu, true, input2_gpu, false, 1, &out_gpu, 0, context); + + out.CopyFrom(out_gpu, *cpu_place); + + float* out_ptr = out.data(); + EXPECT_EQ(out_ptr[0], 9); + EXPECT_EQ(out_ptr[1], 12); + EXPECT_EQ(out_ptr[2], 15); + EXPECT_EQ(out_ptr[3], 12); + EXPECT_EQ(out_ptr[4], 17); + EXPECT_EQ(out_ptr[5], 22); + EXPECT_EQ(out_ptr[6], 15); + EXPECT_EQ(out_ptr[7], 22); + EXPECT_EQ(out_ptr[8], 29); +} +#endif diff --git a/paddle/operators/mean_op.cc b/paddle/operators/mean_op.cc index 997b0c514e96467dc9f9027829616c7b16fe43e1..35e7212dde210a50285272cfd94118fa34fb7cd9 100644 --- a/paddle/operators/mean_op.cc +++ b/paddle/operators/mean_op.cc @@ -18,13 +18,14 @@ namespace paddle { namespace operators { class MeanOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE_EQ(ctx.InputSize(), 1, "Input size of AddOp must be one"); - PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1, "Output size of AddOp must be one"); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), "input should be set"); - PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar(0), "output should be set"); - ctx.Output(0)->Resize(framework::make_ddim({1})); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input of MeanOp must be initialized."); + ctx.Output("Out")->Resize({1}); } }; @@ -33,15 +34,18 @@ class MeanOpMaker : public framework::OpProtoAndCheckerMaker { MeanOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The input of mean op"); - AddOutput("Out", "The output of mean op").IgnoreGradient(); + AddOutput("Out", "The output of mean op").AsNoGradient(); AddComment("Mean Operator"); } }; class MeanGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override { - ctx.Output("X" + framework::kGradVarSuffix) + ctx.Output(framework::GradVarName("X")) ->Resize(ctx.Input("X")->dims()); } }; diff --git a/paddle/operators/mean_op.h b/paddle/operators/mean_op.h index f3db0a29bb234948d180d964fb82057632ec4414..fcb703e63bd5a82f9ffac2bf64e06fd0218dbdaa 100644 --- a/paddle/operators/mean_op.h +++ b/paddle/operators/mean_op.h @@ -31,14 +31,14 @@ template class MeanKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto input = context.Input(0); - auto output = context.Output(0); + auto* input = context.Input("X"); + auto* output = context.Output("Out"); output->mutable_data(context.GetPlace()); auto X = EigenVector::Flatten(*input); auto y = EigenScalar::From(*output); - auto place = context.GetEigenDevice(); + auto& place = context.GetEigenDevice(); y.device(place) = X.mean(); } @@ -48,10 +48,10 @@ template class MeanGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto OG = context.Input("Out" + framework::kGradVarSuffix); + auto OG = context.Input(framework::GradVarName("Out")); PADDLE_ENFORCE(framework::product(OG->dims()) == 1, "Mean Gradient should be scalar"); - auto IG = context.Output("X" + framework::kGradVarSuffix); + auto IG = context.Output(framework::GradVarName("X")); IG->mutable_data(context.GetPlace()); T ig_size = (T)framework::product(IG->dims()); diff --git a/paddle/operators/mul_op.cc b/paddle/operators/mul_op.cc index db81fd555d1c7bea7c0c3bbd70266b4952ed3724..032d234197c12fe107fb195e862c160948ee354c 100644 --- a/paddle/operators/mul_op.cc +++ b/paddle/operators/mul_op.cc @@ -13,16 +13,19 @@ limitations under the License. */ #include "paddle/operators/mul_op.h" +#include "paddle/operators/math/math_function.h" namespace paddle { namespace operators { class MulOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE(ctx.InputSize() == 2, "The mul op must take two inputs"); - auto dim0 = ctx.Input(0)->dims(); - auto dim1 = ctx.Input(1)->dims(); + auto dim0 = ctx.Input("X")->dims(); + auto dim1 = ctx.Input("Y")->dims(); PADDLE_ENFORCE_EQ(dim0.size(), 2, "input X(%s) should be a tensor with 2 dims, a matrix", ctx.op_.Input("X")); @@ -32,8 +35,7 @@ class MulOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( dim0[1], dim1[0], "First matrix's width must be equal with second matrix's height."); - PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1, "The mul op takes only one output"); - ctx.Output(0)->Resize({dim0[0], dim1[1]}); + ctx.Output("Out")->Resize({dim0[0], dim1[1]}); } }; @@ -53,6 +55,9 @@ The equation is: Out = X * Y }; class MulOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override {} std::string DebugString() const override { diff --git a/paddle/operators/mul_op.cu b/paddle/operators/mul_op.cu index 43debbc21a365a15c914e60e151f7782b82080cb..346a7e505d123b5e4e831daa39a1f6349b3dcccf 100644 --- a/paddle/operators/mul_op.cu +++ b/paddle/operators/mul_op.cu @@ -16,5 +16,4 @@ #include "paddle/operators/mul_op.h" namespace ops = paddle::operators; - REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel); diff --git a/paddle/operators/mul_op.h b/paddle/operators/mul_op.h index ab12631c03453a18fbb067e2d12c2bc332acd567..b7812fd1a7a72f5ce543e18c8b7b5b51deff2204 100644 --- a/paddle/operators/mul_op.h +++ b/paddle/operators/mul_op.h @@ -13,6 +13,9 @@ limitations under the License. */ #pragma once + +#include "paddle/operators/math/math_function.h" + #include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" @@ -30,17 +33,14 @@ class MulKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { Eigen::array, 1> dim_pair = { {Eigen::IndexPair(1, 0)}}; - - auto input0 = context.Input("X"); - auto input1 = context.Input("Y"); - auto output = context.Output(0); - + auto* input0 = context.Input("X"); + auto* input1 = context.Input("Y"); + auto* output = context.Output("Out"); output->mutable_data(context.GetPlace()); - auto X = EigenMatrix::From(*input0); auto Y = EigenMatrix::From(*input1); auto Z = EigenMatrix::From(*output); - auto place = context.GetEigenDevice(); + auto& place = context.GetEigenDevice(); Z.device(place) = X.contract(Y, dim_pair); } diff --git a/paddle/operators/net_op.cc b/paddle/operators/net_op.cc index a466c4f30fe87db4ad2a44518e083b57f3cbc2ed..1d1b290440ec125bdb5b190745735dd077261731 100644 --- a/paddle/operators/net_op.cc +++ b/paddle/operators/net_op.cc @@ -15,48 +15,42 @@ */ #include "paddle/operators/net_op.h" +#include +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { +const char NetOp::kAll[] = "all"; + void NetOp::CompleteAddOp(bool calc) { add_op_done_ = true; if (!calc) return; - std::unordered_set input_set; - std::unordered_set output_set; - std::unordered_set temp_output; + std::set input_set; + std::set output_set; for (auto& op : ops_) { for (auto& ipt : op->inputs_) { - if (!Contains(output_set, ipt)) { // Not other op's output - input_set.insert(ipt); - } else { - temp_output.insert(ipt); + for (auto& var_name : ipt.second) { + if (!Contains(output_set, var_name)) { // Not other op's output + input_set.insert(var_name); + } else { + intermediate_outputs_.insert(var_name); + } } } for (auto& opt : op->outputs_) { - output_set.insert(opt); - } - } - - inputs_.reserve(input_set.size()); - std::copy(input_set.begin(), input_set.end(), std::back_inserter(inputs_)); - std::sort(inputs_.begin(), inputs_.end()); - - outputs_.reserve(output_set.size()); - std::copy(output_set.begin(), output_set.end(), std::back_inserter(outputs_)); - std::sort(outputs_.begin(), outputs_.end()); - - std::vector tmp_index; - tmp_index.reserve(temp_output.size()); - int output_len = static_cast(outputs_.size()); - for (int i = 0; i < output_len; ++i) { - if (Contains(temp_output, outputs_[i])) { - tmp_index.push_back(i); + for (auto& var_name : opt.second) { + output_set.insert(var_name); + } } } - - attrs_["temporary_index"] = tmp_index; + auto& inputs = inputs_[kAll]; + inputs.reserve(input_set.size()); + std::copy(input_set.begin(), input_set.end(), std::back_inserter(inputs)); + auto& outputs = outputs_[kAll]; + outputs.reserve(output_set.size()); + std::copy(output_set.begin(), output_set.end(), std::back_inserter(outputs)); } std::string NetOp::DebugString() const { @@ -73,5 +67,25 @@ std::string NetOp::DebugString() const { bool NetOp::IsNetOp() const { return true; } +std::vector NetOp::OutputVars(bool has_intermediate) const { + if (has_intermediate) { + return this->outputs_.at(kAll); + } + auto& all = this->outputs_.at(kAll); + std::vector ret_val; + for (auto& each : all) { + if (!Contains(intermediate_outputs_, each)) { + ret_val.push_back(each); + } + } + return ret_val; +} + +NetOp::NetOp(const std::string& type, + const framework::OperatorBase::VarNameMap& inputs, + const framework::OperatorBase::VarNameMap& outputs, + const framework::AttributeMap& attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + } // namespace operators } // namespace paddle diff --git a/paddle/operators/net_op.h b/paddle/operators/net_op.h index 792b336675fc97659d9a23358cf3d48ede56e54e..4a3408c158a029a96740717280c1562671fa938f 100644 --- a/paddle/operators/net_op.h +++ b/paddle/operators/net_op.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include "paddle/framework/framework.pb.h" #include "paddle/framework/op_registry.h" namespace paddle { @@ -35,6 +36,11 @@ namespace operators { */ class NetOp : public framework::OperatorBase { public: + static const char kAll[]; + NetOp() : framework::OperatorBase("plain_net", {}, {}, {}) {} + NetOp(const std::string& type, const VarNameMap& inputs, + const VarNameMap& outputs, const framework::AttributeMap& attrs); + /** * Infer all the operators' input and output variables' shapes, will be called * before every mini-batch @@ -90,11 +96,13 @@ class NetOp : public framework::OperatorBase { std::string DebugString() const override; bool IsNetOp() const override; + std::vector OutputVars(bool has_intermediate) const override; std::vector> ops_; private: bool add_op_done_{false}; + std::set intermediate_outputs_; template static bool Contains(T container, KeyType key) { diff --git a/paddle/operators/net_op_test.cc b/paddle/operators/net_op_test.cc index 76bf79f9b51fd759da2d02cd90fa458a32be4178..f7aa56262ef71c24bf668950f6e9914e5f96ff70 100644 --- a/paddle/operators/net_op_test.cc +++ b/paddle/operators/net_op_test.cc @@ -12,6 +12,7 @@ static int run_cnt = 0; class TestOp : public framework::OperatorBase { public: + using framework::OperatorBase::OperatorBase; void InferShape(const Scope& scope) const override { ++infer_shape_cnt; } void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const override { @@ -21,6 +22,7 @@ class TestOp : public framework::OperatorBase { class EmptyOp : public framework::OperatorBase { public: + using framework::OperatorBase::OperatorBase; void InferShape(const Scope& scope) const override {} void Run(const Scope& scope, const DeviceContext& dev_ctx) const override {} }; @@ -42,40 +44,32 @@ TEST(OpKernel, all) { auto net = std::make_shared(); ASSERT_NE(net, nullptr); - auto op1 = std::make_shared(); - op1->inputs_ = {"x", "w1", "b1"}; - op1->outputs_ = {"y"}; + auto op1 = std::shared_ptr( + new TestOp("test", {{"X", {"x"}}, {"W", {"w1"}}, {"b", {"b1"}}}, + {{"Out", {"y"}}}, {})); net->AddOp(op1); - auto op2 = std::make_shared(); - op2->inputs_ = {"y", "w2", "b2"}; - op2->outputs_ = {"z"}; + auto op2 = std::shared_ptr( + new TestOp("test", {{"X", {"y"}}, {"W", {"w2"}}, {"b", {"b2"}}}, + {{"Out", {"z"}}}, {})); net->AddOp(op2); net->CompleteAddOp(); - AssertSameVectorWithoutOrder({"x", "w1", "b1", "w2", "b2"}, net->inputs_); - AssertSameVectorWithoutOrder({"y", "z"}, net->outputs_); - auto tmp_idx_iter = net->attrs_.find("temporary_index"); - ASSERT_NE(net->attrs_.end(), tmp_idx_iter); - auto& tmp_idx = boost::get>(tmp_idx_iter->second); - ASSERT_EQ(1UL, tmp_idx.size()); - ASSERT_EQ("y", net->outputs_[tmp_idx[0]]); + AssertSameVectorWithoutOrder({"x", "w1", "b1", "w2", "b2"}, + net->inputs_.at(NetOp::kAll)); + AssertSameVectorWithoutOrder({"y", "z"}, net->outputs_.at(NetOp::kAll)); - Scope scope; - platform::CPUDeviceContext dev_ctx; + auto final_outs = net->OutputVars(false); - net->InferShape(scope); - net->Run(scope, dev_ctx); - ASSERT_EQ(2, infer_shape_cnt); - ASSERT_EQ(2, run_cnt); - ASSERT_THROW(net->AddOp(op2), platform::EnforceNotMet); + ASSERT_EQ(final_outs.size(), 1UL); + ASSERT_EQ(final_outs[0], "z"); } TEST(NetOp, insert_op) { NetOp net; - auto op1 = std::make_shared(); - op1->inputs_ = {"x", "w1", "b1"}; - op1->outputs_ = {"y"}; + auto op1 = std::shared_ptr( + new EmptyOp("empty", {{"X", {"x"}}, {"W", {"w1"}}, {"b", {"b1"}}}, + {{"Out", {"y"}}}, {})); net.AddOp(op1); net.InsertOp(0, op1); ASSERT_EQ(2UL, net.ops_.size()); diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index 243837420562634c3d99fd0acf234ebd53539735..5ddee75581824996fd312f8ddf13007759fd9a67 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -91,12 +91,17 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope) const { // create step net's temp inputs for (auto& input : net_op->inputs_) { // the weight are located in parent scope - if (!step_scope.FindVar(input)) - step_scope.NewVar(input)->GetMutable(); + for (auto& var_name : input.second) { + if (!step_scope.FindVar(var_name)) { + step_scope.NewVar(var_name)->GetMutable(); + } + } } // create stepnet's outputs for (const auto& output : net_op->outputs_) { - step_scope.NewVar(output); + for (auto& var_name : output.second) { + step_scope.NewVar(var_name); + } } step_scopes->emplace_back(&step_scope); } @@ -130,8 +135,11 @@ const rnn::ArgumentName RecurrentGradientOp::kArgName{ "inlink@grad", "inlink_alias", "outlink_alias", "memories", "pre_memories", "boot_memories@grad"}; -void RecurrentOp::Init() { - OperatorBase::Init(); +RecurrentOp::RecurrentOp(const std::string& type, + const framework::OperatorBase::VarNameMap& inputs, + const framework::OperatorBase::VarNameMap& outputs, + const framework::AttributeMap& attrs) + : OperatorBase(type, inputs, outputs, attrs) { std::unique_ptr arg(new rnn::Argument()); rnn::InitArgument(kArgName, arg.get(), *this); alg_.Init(std::move(arg)); @@ -147,13 +155,13 @@ class RecurrentAlgorithmProtoAndCheckerMaker // inputs and outputs stored in proto AddInput(name.inlinks, "the inputs that need to be segmented for each step.") - .SetMultiple(); + .AsDuplicable(); AddInput(name.boot_memories, "variables to initialize memories.") - .SetMultiple(); + .AsDuplicable(); AddInput(name.step_net, "network shared by all steps."); AddOutput(name.outlinks, "the outputs that need to concated for all steps.") - .SetMultiple(); + .AsDuplicable(); AddOutput(name.step_scopes, "step scopes"); // Attributes stored in AttributeMap @@ -225,8 +233,11 @@ void RecurrentGradientAlgorithm::InferShape(const Scope& scope) const { LinkBootMemoryGradients(step_scopes[0], true /*infer_shape_mode*/); } -void RecurrentGradientOp::Init() { - OperatorBase::Init(); +RecurrentGradientOp::RecurrentGradientOp( + const std::string& type, const framework::OperatorBase::VarNameMap& inputs, + const framework::OperatorBase::VarNameMap& outputs, + const framework::AttributeMap& attrs) + : OperatorBase(type, inputs, outputs, attrs) { std::unique_ptr arg(new rnn::Argument()); rnn::InitArgument(kArgName, arg.get(), *this); alg_.Init(std::move(arg)); diff --git a/paddle/operators/recurrent_op.h b/paddle/operators/recurrent_op.h index d1e60fed9cef3c6dccba3ad498fc3658a177b3f7..8f4f2444d844b4ba5948f001a365a7ecaeecc106 100644 --- a/paddle/operators/recurrent_op.h +++ b/paddle/operators/recurrent_op.h @@ -101,11 +101,11 @@ class RecurrentGradientAlgorithm { class RecurrentOp final : public framework::OperatorBase { public: - void Init() override; - + RecurrentOp(const std::string& type, const VarNameMap& inputs, + const VarNameMap& outputs, const framework::AttributeMap& attrs); /** - * InferShape must be called before Run. - */ + * InferShape must be called before Run. + */ void InferShape(const framework::Scope& scope) const override { alg_.InferShape(scope); } @@ -123,7 +123,9 @@ class RecurrentOp final : public framework::OperatorBase { class RecurrentGradientOp final : public framework::OperatorBase { public: - void Init() override; + RecurrentGradientOp(const std::string& type, const VarNameMap& inputs, + const VarNameMap& outputs, + const framework::AttributeMap& attrs); /** * InferShape must be called before Run. diff --git a/paddle/operators/recurrent_op_test.cc b/paddle/operators/recurrent_op_test.cc index 6ce28a2b52e3b90596a68714bfdbc07d2d4814d5..2f6eff0720847fdfa6443d2fc233e92dac2d0fab 100644 --- a/paddle/operators/recurrent_op_test.cc +++ b/paddle/operators/recurrent_op_test.cc @@ -25,157 +25,7 @@ namespace paddle { namespace operators { -using framework::make_ddim; -using framework::DDim; -using framework::Tensor; -using framework::Variable; -using framework::Scope; -using framework::OpRegistry; - -class RecurrentOpTest : public ::testing::Test { - protected: - virtual void SetUp() override { - CreateGlobalVariables(); - CreateStepNet(); - CreateRNNOp(); - } - - virtual void TearDown() override {} - - void CreateGlobalVariables() { - // create input, and init content - LOG(INFO) << "create global variable x"; - for (auto inlink : std::vector{"x", "x0", "x1", "h"}) { - Variable* x = scope_.NewVar(inlink); - DDim dims = make_ddim(std::vector{ - 10 /*sent size*/, 20 /*batch size*/, 30 /*input dim*/}); - x->GetMutable()->mutable_data(dims, platform::CPUPlace()); - } - // create output alias just for test - for (auto inlink : std::vector{"h@alias"}) { - Variable* x = scope_.NewVar(inlink); - DDim dims = - make_ddim(std::vector{20 /*batch size*/, 30 /*input dim*/}); - x->GetMutable()->mutable_data(dims, platform::CPUPlace()); - } - - LOG(INFO) << "create global variable w"; - Variable* w = scope_.NewVar("rnn/w"); - w->GetMutable()->mutable_data( - make_ddim(std::vector{30, 30}), platform::CPUPlace()); - - for (auto boot : std::vector{"h_boot"}) { - LOG(INFO) << "create global variable " << boot; - Variable* h_boot = scope_.NewVar(boot); - h_boot->GetMutable()->mutable_data( - make_ddim(std::vector{20 /*batch size*/, 30 /*input dim*/}), - platform::CPUPlace()); - } - - LOG(INFO) << "create variable step_scopes"; - scope_.NewVar("step_scopes"); - - LOG(INFO) << "create variable h"; - scope_.NewVar("h"); - } - - void CreateRNNOp() { - framework::OpDesc op_desc; - - op_desc.set_type("recurrent_op"); - // inlinks 0 - op_desc.add_inputs("x"); - op_desc.add_inputs("x0"); - op_desc.add_inputs("x1"); - // boot_memories 3 - op_desc.add_inputs("h_boot"); - // step net 5 - op_desc.add_inputs("step_net"); - // outlinks 6 - op_desc.add_outputs("h"); - // step scopes 7 - op_desc.add_outputs("step_scopes"); - - auto _input_format = std::vector{ - 0, // in_link - 3, // memories - 4 // step_net - }; - auto input_format = op_desc.add_attrs(); - input_format->set_name("input_format"); - input_format->set_type(paddle::framework::AttrType::INTS); - for (auto i : _input_format) { - input_format->add_ints(i); - } - - auto output_format = op_desc.add_attrs(); - output_format->set_name("output_format"); - output_format->set_type(paddle::framework::AttrType::INTS); - for (auto i : std::vector{0, 1, 2}) { - output_format->add_ints(i); - } - - auto inlink_alias = op_desc.add_attrs(); - inlink_alias->set_name("inlink_alias"); - inlink_alias->set_type(paddle::framework::AttrType::STRINGS); - - auto outlink_alias = op_desc.add_attrs(); - outlink_alias->set_name("outlink_alias"); - outlink_alias->set_type(paddle::framework::AttrType::STRINGS); - - auto pre_memories = op_desc.add_attrs(); - pre_memories->set_name("pre_memories"); - pre_memories->set_type(paddle::framework::AttrType::STRINGS); - - auto memories = op_desc.add_attrs(); - memories->set_name("memories"); - memories->set_type(paddle::framework::AttrType::STRINGS); - - // create inlink_alias - for (const auto& item : - std::vector{"x@alias", "x0@alias", "x1@alias"}) { - inlink_alias->add_strings(item); - } - // pre memories - for (const auto& item : std::vector{"rnn/h@pre"}) { - pre_memories->add_strings(item); - } - // memories - for (const auto& item : std::vector{"rnn/h"}) { - memories->add_strings(item); - } - // output alias - for (const auto& item : std::vector{"h@alias"}) { - outlink_alias->add_strings(item); - } - - rnn_op_ = OpRegistry::CreateOp(op_desc); - - LOG(INFO) << "rnn_op finish init"; - } - - void CreateStepNet() { - LOG(INFO) << "create variable step_net"; - Variable* var = scope_.NewVar("step_net"); - auto net = var->GetMutable(); - net->AddOp( - OpRegistry::CreateOp("mul", {"rnn/h@pre", "rnn/w"}, {"rnn/s"}, {})); - - net->AddOp( - OpRegistry::CreateOp("add_two", {"x@alias", "rnn/s"}, {"rnn/h"}, {})); - net->CompleteAddOp(); - } - - // father scope - Scope scope_; - std::shared_ptr rnn_op_; -}; - -TEST_F(RecurrentOpTest, Run) { - platform::CPUDeviceContext ctx; - rnn_op_->InferShape(scope_); - rnn_op_->Run(scope_, ctx); -} +using namespace paddle::framework; class RecurrentGradientAlgorithmTest : public ::testing::Test { protected: @@ -281,11 +131,13 @@ class RecurrentGradientAlgorithmTest : public ::testing::Test { LOG(INFO) << "create variable step_net"; Variable* var = scope_.NewVar("step_net"); auto net = var->GetMutable(); - net->AddOp(OpRegistry::CreateOp("mul", {"rnn/h_pre", "rnn/w", "rnn/s_grad"}, - {"rnn/h_pre_grad", "rnn/w_grad"}, {})); + // TODO(qingqing) modify backward op create for RNNOp unit test + // and the unit test will be removed to Python. + // net->AddOp(OpRegistry::CreateOp("mul", {"X", {"rnn/h_pre", "rnn/w", + // "rnn/s_grad"}}, {"Y", {"rnn/h_pre_grad", "rnn/w_grad"}}, {})); - net->AddOp(OpRegistry::CreateOp("add_two", {"rnn/h_grad"}, - {"rnn/x_grad", "rnn/s_grad"}, {})); + // net->AddOp(OpRegistry::CreateOp("add_two", {"X", {"rnn/h_grad"}}, + // {"Y", {"rnn/x_grad"}}, {"Out", "rnn/s_grad"}}, {})); net->CompleteAddOp(); } @@ -359,7 +211,8 @@ TEST(RecurrentOp, LinkMemories) { memories.push_back(mem_attr); for (size_t i = 1; i < len; ++i) { - rnn::LinkMemories(step_scopes, memories, i, -1, false /*infer_shape_mode*/); + rnn::LinkMemories(step_scopes, memories, i, -1, false + /*infer_shape_mode*/); } // check for (size_t i = 0; i < len - 1; ++i) { @@ -375,7 +228,8 @@ TEST(RecurrentOp, LinkMemories) { } for (int i = len - 2; i >= 0; --i) { - rnn::LinkMemories(step_scopes, memories, i, 1, false /*infer_shape_mode*/); + rnn::LinkMemories(step_scopes, memories, i, 1, false + /*infer_shape_mode*/); } // check for (int i = len - 2; i >= 0; --i) { @@ -395,4 +249,4 @@ TEST(RecurrentOp, LinkMemories) { USE_OP(add_two); USE_OP(mul); -USE_OP_WITHOUT_KERNEL(recurrent_op); +USE_OP_ITSELF(recurrent_op); diff --git a/paddle/operators/rowwise_add_op.cc b/paddle/operators/rowwise_add_op.cc index a012ab0be06449b6e5340a4c4af520d561db5301..0c6ae64d0c81c75223dbeda2c115976f5e86a466 100644 --- a/paddle/operators/rowwise_add_op.cc +++ b/paddle/operators/rowwise_add_op.cc @@ -18,18 +18,19 @@ namespace paddle { namespace operators { class RowwiseAddOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE(ctx.InputSize() == 2UL, - "Two inputs is needed by rowwise add"); - auto dim0 = ctx.Input(0)->dims(); - auto dim1 = ctx.Input(1)->dims(); + auto dim0 = ctx.Input("X")->dims(); + auto dim1 = ctx.Input("b")->dims(); PADDLE_ENFORCE(dim0.size() == 2, "Input 0 must be matrix"); PADDLE_ENFORCE(dim1.size() == 1, "The second input must be vector"); PADDLE_ENFORCE(dim0[1] == dim1[0], "The width of two input must be same"); - PADDLE_ENFORCE(ctx.OutputSize() == 1, "The output size must be 1"); - ctx.Output(0)->Resize(ctx.Input(0)->dims()); + PADDLE_ENFORCE(ctx.OutputSize("Out") == 1, "The output size must be 1"); + ctx.Output("Out")->Resize(ctx.Input("X")->dims()); } }; diff --git a/paddle/operators/rowwise_add_op.h b/paddle/operators/rowwise_add_op.h index 27d7a33e8ad2e87e05d6ec4caf6e484bc7ede054..3ad60172c167fb3e854423a45b7e0c1007647fda 100644 --- a/paddle/operators/rowwise_add_op.h +++ b/paddle/operators/rowwise_add_op.h @@ -31,11 +31,11 @@ template class RowwiseAddKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto out = context.Output(0); + auto out = context.Output("Out"); out->mutable_data(context.GetPlace()); - auto input = EigenMatrix::From(*context.Input(0)); - auto bias = EigenVector::From(*context.Input(1)); + auto input = EigenMatrix::From(*context.Input("X")); + auto bias = EigenVector::From(*context.Input("b")); auto output = EigenMatrix::From(*out); const int bias_size = bias.dimension(0); diff --git a/paddle/operators/sgd_op.cc b/paddle/operators/sgd_op.cc index f9a28ff8a6a06c5c239c4e6ec21eacb410cc162f..bf76df272b6faaed01ed8d715fe3b547ec7dc4e3 100644 --- a/paddle/operators/sgd_op.cc +++ b/paddle/operators/sgd_op.cc @@ -18,16 +18,15 @@ namespace paddle { namespace operators { class SGDOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE_EQ(ctx.InputSize(), 2, "Input size of SGDOp must be two"); - PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1, "Output size of SGDOp must be one"); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), "inputs[0] mast be set"); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(1), "inputs[1] mast be set"); - PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar(0), "outputs[0] mast be set"); - PADDLE_ENFORCE(ctx.Input(0)->dims() == ctx.Input(1)->dims(), - "Two input of SGD Op's dimension must be same."); - ctx.Output(0)->Resize(ctx.Input(0)->dims()); + PADDLE_ENFORCE( + ctx.Input("param")->dims() == ctx.Input("grad")->dims(), + "Two input of SGD Op's dimension must be same."); + ctx.Output("param_out")->Resize(ctx.Input("param")->dims()); } }; diff --git a/paddle/operators/sgd_op_test.cc b/paddle/operators/sgd_op_test.cc deleted file mode 100644 index 75137259f5e608b259b073101353e5818bb17c92..0000000000000000000000000000000000000000 --- a/paddle/operators/sgd_op_test.cc +++ /dev/null @@ -1,22 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -USE_OP(sgd); -TEST(SGDOp, GetOpProto) { - auto& protos = paddle::framework::OpRegistry::protos(); - auto it = protos.find("sgd"); - ASSERT_NE(it, protos.end()); -} diff --git a/paddle/operators/sigmoid_op.cc b/paddle/operators/sigmoid_op.cc index bc5e0bbb183f9bdf0a3fa8a5a02499282fbd6b98..a7dfb624e5b779164eb07763eb604c548f6e89e7 100644 --- a/paddle/operators/sigmoid_op.cc +++ b/paddle/operators/sigmoid_op.cc @@ -18,11 +18,12 @@ namespace paddle { namespace operators { class SigmoidOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE(ctx.InputSize() == 1, "Sigmoid Op only have one input"); - PADDLE_ENFORCE(ctx.OutputSize() == 1, "Sigmoid Op only have one output"); - ctx.Output(0)->Resize(ctx.Input(0)->dims()); + ctx.Output("Y")->Resize(ctx.Input("X")->dims()); } }; @@ -38,6 +39,9 @@ class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker { }; class SigmoidOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override { ctx.Output(0)->Resize(ctx.Input(0)->dims()); diff --git a/paddle/operators/sigmoid_op.h b/paddle/operators/sigmoid_op.h index 7af879b2091e4a7f80a3a64be029394156650c23..11ab923eb346c1f8de3a6bbebdfa874b6530004a 100644 --- a/paddle/operators/sigmoid_op.h +++ b/paddle/operators/sigmoid_op.h @@ -28,8 +28,8 @@ template class SigmoidKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto input = context.Input(0); - auto output = context.Output(0); + auto input = context.Input("X"); + auto output = context.Output("Y"); output->mutable_data(context.GetPlace()); // The clipping is used in Paddle's raw implenmention diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc index 3dd4e86918a86f408e7867d15b4fdc8f9cbbb5ce..5d8ece1a254a58990bfb2f919567fa43689335b9 100644 --- a/paddle/operators/softmax_op.cc +++ b/paddle/operators/softmax_op.cc @@ -18,14 +18,13 @@ namespace paddle { namespace operators { class SoftmaxOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE_EQ(ctx.InputSize(), 1UL, - "Only one input is need for softmax"); - PADDLE_ENFORCE_EQ(ctx.Input("X")->dims().size(), 2UL, - "The input of softmax op must be matrix"); - PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1UL, - "Only one output is need for softmax"); + PADDLE_ENFORCE(ctx.Input("X")->dims().size() == 2UL, + "The input of softmax op must be matrix"); ctx.Output("Y")->Resize(ctx.Input("X")->dims()); } }; @@ -42,13 +41,12 @@ class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker { }; class SoftmaxOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE_EQ(ctx.InputSize(), 3UL, - "Input of SoftmaxOpGrad should be 3, X, Y, YG"); - PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1UL, - "Output of SoftmaxOpGrad should be 1"); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) should not be null"); + PADDLE_ENFORCE(ctx.InputVar("Y") != nullptr, "Input(Y) should not be null"); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Y")), "Input(Y@GRAD) should not be null"); PADDLE_ENFORCE(ctx.Input("Y")->dims() == diff --git a/paddle/operators/uniform_random_op.cc b/paddle/operators/uniform_random_op.cc index 405b84b76d2e24db25d2ff16e99495f2f132ef09..9d668e6085b93bc5a3a06683aa4470f62ae47c02 100644 --- a/paddle/operators/uniform_random_op.cc +++ b/paddle/operators/uniform_random_op.cc @@ -27,7 +27,7 @@ template class CPUUniformRandomKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* tensor = context.Output(0); + auto* tensor = context.Output("Out"); T* data = tensor->mutable_data(context.GetPlace()); unsigned int seed = static_cast(context.op_.GetAttr("seed")); @@ -46,11 +46,14 @@ class CPUUniformRandomKernel : public framework::OpKernel { }; class UniformRandomOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext& ctx) const override { PADDLE_ENFORCE(GetAttr("min") < GetAttr("max"), "uniform_random's min must less then max"); - auto* tensor = ctx.Output(0); + auto* tensor = ctx.Output("Out"); auto dims = GetAttr>("dims"); tensor->Resize(framework::make_ddim(dims)); } diff --git a/paddle/operators/uniform_random_op.cu b/paddle/operators/uniform_random_op.cu index f1a63e52ec0d3d46a505a89d7d7916bf93a58221..7a243555b6385af690e9632dfa81bf96d70f925d 100644 --- a/paddle/operators/uniform_random_op.cu +++ b/paddle/operators/uniform_random_op.cu @@ -46,12 +46,13 @@ template class GPUUniformRandomKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* tensor = context.Output(0); + auto* tensor = context.Output("Out"); T* data = tensor->mutable_data(context.GetPlace()); unsigned int seed = static_cast(context.op_.GetAttr("seed")); if (seed == 0) { - seed = std::random_device()(); + std::random_device rd; + seed = rd(); } T min = static_cast(context.op_.GetAttr("min")); T max = static_cast(context.op_.GetAttr("max")); diff --git a/paddle/platform/dynload/cublas.h b/paddle/platform/dynload/cublas.h index c44b7240a885c2ef71e550df645dbaded69f9944..9d8343c0b5e200b390ccda760f09816959952e9d 100644 --- a/paddle/platform/dynload/cublas.h +++ b/paddle/platform/dynload/cublas.h @@ -48,13 +48,13 @@ extern void *cublas_dso_handle; }; \ extern DynLoad__##__name __name #else -#define DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ - struct DynLoad__##__name { \ - inline template \ - cublasStatus_t operator()(Args... args) { \ - return __name(args...); \ - } \ - }; \ +#define DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + inline cublasStatus_t operator()(Args... args) { \ + return __name(args...); \ + } \ + }; \ extern DynLoad__##__name __name #endif @@ -62,12 +62,12 @@ extern void *cublas_dso_handle; DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) #define CUBLAS_BLAS_ROUTINE_EACH(__macro) \ - __macro(cublasSgemv); \ - __macro(cublasDgemv); \ - __macro(cublasSgemm); \ - __macro(cublasDgemm); \ - __macro(cublasSgeam); \ - __macro(cublasDgeam); \ + __macro(cublasSgemv_v2); \ + __macro(cublasDgemv_v2); \ + __macro(cublasSgemm_v2); \ + __macro(cublasDgemm_v2); \ + __macro(cublasSgeam_v2); \ + __macro(cublasDgeam_v2); \ __macro(cublasCreate_v2); \ __macro(cublasDestroy_v2); \ __macro(cublasSetStream_v2); \ diff --git a/paddle/platform/dynload/curand.h b/paddle/platform/dynload/curand.h index d8c46bc41e18d013a80cd0a9116a4b1a52bf5854..7bfe0778c78f6075ec8a284d478a1f9d5ee66ae9 100644 --- a/paddle/platform/dynload/curand.h +++ b/paddle/platform/dynload/curand.h @@ -55,6 +55,7 @@ extern void *curand_dso_handle; __macro(curandSetPseudoRandomGeneratorSeed); \ __macro(curandGenerateUniform); \ __macro(curandGenerateUniformDouble); \ + __macro(curandGenerateNormal); \ __macro(curandDestroyGenerator); CURAND_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CURAND_WRAP); diff --git a/paddle/platform/enforce.h b/paddle/platform/enforce.h index d2adb997de8e36922d5056b20f238a82eee74f8c..337a059fb1494d500be0fd2437e59c863ae1563c 100644 --- a/paddle/platform/enforce.h +++ b/paddle/platform/enforce.h @@ -15,11 +15,12 @@ limitations under the License. */ #pragma once #include -#include #include #include #include #include +#include "paddle/string/printf.h" +#include "paddle/string/to_string.h" #ifndef PADDLE_ONLY_CPU @@ -194,8 +195,8 @@ inline void throw_on_error(T e) { #define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \ PADDLE_ENFORCE(__VAL0 __CMP __VAL1, \ "enforce %s " #__CMP " %s failed, %s " #__INV_CMP " %s\n%s", \ - #__VAL0, #__VAL1, std::to_string(__VAL0), \ - std::to_string(__VAL1), \ + #__VAL0, #__VAL1, paddle::string::to_string(__VAL0), \ + paddle::string::to_string(__VAL1), \ paddle::string::Sprintf("" __VA_ARGS__)); } // namespace platform diff --git a/paddle/platform/enforce_test.cc b/paddle/platform/enforce_test.cc index 5408fce558ab0d9c369aaba22374315fe553ce0f..80bdee3d9dfbe38ef707a6ba60cdb7f7b99714de 100644 --- a/paddle/platform/enforce_test.cc +++ b/paddle/platform/enforce_test.cc @@ -9,6 +9,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include +#include #include #include "gtest/gtest.h" @@ -83,7 +85,7 @@ TEST(ENFORCE_NE, FAIL) { } catch (paddle::platform::EnforceNotMet error) { caught_exception = true; EXPECT_TRUE(HasPrefix(StringPiece(error.what()), - "enforce 1.0 != 1UL failed, 1.000000 == 1")) + "enforce 1.0 != 1UL failed, 1 == 1")) << error.what() << " does not have expected prefix"; } EXPECT_TRUE(caught_exception); @@ -176,3 +178,39 @@ TEST(ENFORCE_NOT_NULL, FAIL) { } EXPECT_TRUE(caught_exception); } + +struct Dims { + size_t dims_[4]; + + bool operator==(const Dims& o) const { + for (size_t i = 0; i < 4; ++i) { + if (dims_[i] != o.dims_[i]) return false; + } + return true; + } +}; + +std::ostream& operator<<(std::ostream& os, const Dims& d) { + for (size_t i = 0; i < 4; ++i) { + if (i == 0) { + os << "["; + } + os << d.dims_[i]; + if (i == 4 - 1) { + os << "]"; + } else { + os << ", "; + } + } + return os; +} + +TEST(ENFORCE_USER_DEFINED_CLASS, EQ) { + Dims a{{1, 2, 3, 4}}, b{{1, 2, 3, 4}}; + PADDLE_ENFORCE_EQ(a, b); +} + +TEST(ENFORCE_USER_DEFINED_CLASS, NE) { + Dims a{{1, 2, 3, 4}}, b{{5, 6, 7, 8}}; + ASSERT_THROW(PADDLE_ENFORCE_EQ(a, b), paddle::platform::EnforceNotMet); +} \ No newline at end of file diff --git a/paddle/platform/place.h b/paddle/platform/place.h index a82e8c942fa28297d91056a66b61f085f2bdb946..1117476bb37f1b0f3876c55e610803d5ee2558ce 100644 --- a/paddle/platform/place.h +++ b/paddle/platform/place.h @@ -14,8 +14,8 @@ limitations under the License. */ #pragma once -#include #include +#include "paddle/platform/variant.h" namespace paddle { namespace platform { diff --git a/paddle/platform/variant.h b/paddle/platform/variant.h new file mode 100644 index 0000000000000000000000000000000000000000..c2257af1b5dd1a1e284979bf17e1a947072baa85 --- /dev/null +++ b/paddle/platform/variant.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include + +#ifndef PADDLE_ONLY_CPU + +// Because boost's variadic templates has bug on nvcc, boost will disable +// variadic template support when GPU enabled on nvcc. +// Define BOOST_NO_CXX11_VARIADIC_TEMPLATES on gcc/clang to generate same +// function symbols. +// +// https://github.com/PaddlePaddle/Paddle/issues/3386 +#ifndef BOOST_NO_CXX11_VARIADIC_TEMPLATES +#define BOOST_NO_CXX11_VARIADIC_TEMPLATES +#endif +#endif + +#include diff --git a/paddle/pserver/test/CMakeLists.txt b/paddle/pserver/test/CMakeLists.txt index 6e8f9c37f64b70921e09241089a5a480fd8ca47f..b66a00ba0652dfe1afbb877eca06cacdfe2ca343 100644 --- a/paddle/pserver/test/CMakeLists.txt +++ b/paddle/pserver/test/CMakeLists.txt @@ -3,7 +3,7 @@ add_unittest_without_exec(socket_test SocketTest.cpp) add_test(NAME socket_test - COMMAND ${PROJ_ROOT}/paddle/.set_port.sh -p port + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port ${CMAKE_CURRENT_BINARY_DIR}/socket_test --loop_time=10) ####################### test_ProtoServer #################### @@ -12,7 +12,7 @@ add_unittest_without_exec(test_ProtoServer IF(NOT ON_TRAVIS) add_test(NAME test_ProtoServer - COMMAND ${PROJ_ROOT}/paddle/.set_port.sh -p port + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port ${CMAKE_CURRENT_BINARY_DIR}/test_ProtoServer) ENDIF(NOT ON_TRAVIS) @@ -24,5 +24,5 @@ ENDIF(NOT ON_TRAVIS) add_unittest_without_exec(test_ParameterServer2 test_ParameterServer2.cpp) add_test(NAME test_ParameterServer2 - COMMAND ${PROJ_ROOT}/paddle/.set_port.sh -p port -n 4 + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port -n 4 ${CMAKE_CURRENT_BINARY_DIR}/test_ParameterServer2) diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 44442be4729ff77e8d378c93acebe1486eb75397..2f0205b7702b6d73b5348430f39166ec78f6c143 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -31,7 +31,7 @@ Configuring cmake in /paddle/build ... -DWITH_DOC=OFF -DWITH_GPU=${WITH_GPU:-OFF} -DWITH_AVX=${WITH_AVX:-OFF} - -DWITH_GOLANG=${WITH_GOLANG:-OFF} + -DWITH_GOLANG=${WITH_GOLANG:-ON} -DWITH_SWIG_PY=ON -DWITH_C_API=${WITH_C_API:-OFF} -DWITH_PYTHON=${WITH_PYTHON:-ON} @@ -51,7 +51,7 @@ cmake .. \ -DWITH_DOC=OFF \ -DWITH_GPU=${WITH_GPU:-OFF} \ -DWITH_AVX=${WITH_AVX:-OFF} \ - -DWITH_GOLANG=${WITH_GOLANG:-OFF} \ + -DWITH_GOLANG=${WITH_GOLANG:-ON} \ -DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} \ -DWITH_C_API=${WITH_C_API:-OFF} \ -DWITH_PYTHON=${WITH_PYTHON:-ON} \ @@ -74,11 +74,11 @@ cat < +#include + +namespace paddle { +namespace string { +template +inline std::string to_string(T v) { + std::ostringstream sout; + sout << v; + return sout.str(); +} + +// Faster std::string/const char* type +template <> +inline std::string to_string(std::string v) { + return v; +} + +template <> +inline std::string to_string(const char* v) { + return std::string(v); +} + +} // namespace string +} // namespace paddle diff --git a/paddle/string/to_string_test.cc b/paddle/string/to_string_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..5ff1b007f1875c7b920a08bd13b8d98cdc5138d3 --- /dev/null +++ b/paddle/string/to_string_test.cc @@ -0,0 +1,39 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/string/to_string.h" +#include + +constexpr char kOutputString[] = "User Defined Output"; +class UserDefinedClass { +public: +}; + +std::ostream& operator<<(std::ostream& s, const UserDefinedClass& ins) { + s << kOutputString; + return s; +} + +TEST(to_string, normal) { + using namespace paddle::string; + ASSERT_EQ("10", to_string(10)); + ASSERT_EQ("abc", to_string("abc")); + ASSERT_EQ("1.2", to_string(1.2)); +} + +TEST(to_string, user_defined) { + using namespace paddle::string; + UserDefinedClass instance; + ASSERT_EQ(kOutputString, to_string(instance)); +} \ No newline at end of file diff --git a/paddle/trainer/NewRemoteParameterUpdater.cpp b/paddle/trainer/NewRemoteParameterUpdater.cpp index af1dceed0284c70d68b61b9682b0cb23c28043d6..35dcb235e7e8b65f7d1623a1ec66d963b1283385 100644 --- a/paddle/trainer/NewRemoteParameterUpdater.cpp +++ b/paddle/trainer/NewRemoteParameterUpdater.cpp @@ -66,28 +66,92 @@ void NewRemoteParameterUpdater::init( // from parameter server if (paddle_begin_init_params(parameterClient_)) { LOG(INFO) << "paddle_begin_init_params start"; + // NOTE: convert V1 OptimizatioinConfig proto to V2 OptimizerConfig. + // This makes golang pserver compatible with handy V1 demos. + // TODO(wuyi): Refine or remove these ugly converting lines + OptimizerConfig optimizerConfigV2; + if (trainerConfig_.learning_method() == "momentum") { + optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::SGD); + } else if (trainerConfig_.learning_method() == "adagrad") { + optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::Adagrad); + optimizerConfigV2.mutable_adagrad()->set_epsilon( + trainerConfig_.ada_epsilon()); + } else if (trainerConfig_.learning_method() == "adadelta") { + optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::Adagrad); + optimizerConfigV2.mutable_adadelta()->set_epsilon( + trainerConfig_.ada_epsilon()); + optimizerConfigV2.mutable_adadelta()->set_rho(trainerConfig_.ada_rou()); + } else if (trainerConfig_.learning_method() == "adam") { + optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::Adam); + optimizerConfigV2.mutable_adam()->set_beta_1(trainerConfig_.adam_beta1()); + optimizerConfigV2.mutable_adam()->set_beta_2(trainerConfig_.adam_beta2()); + optimizerConfigV2.mutable_adam()->set_epsilon( + trainerConfig_.adam_epsilon()); + } else { + LOG(ERROR) << "got unsupported v1 optimizer config: " + << trainerConfig_.learning_method(); + optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::SGD); + } + + if (trainerConfig_.learning_rate_schedule() == "constant") { + optimizerConfigV2.set_lr_policy(paddle::OptimizerConfig::Const); + optimizerConfigV2.mutable_const_lr()->set_learning_rate( + trainerConfig_.learning_rate()); + } else if (trainerConfig_.learning_rate_schedule() == "linear") { + optimizerConfigV2.set_lr_policy(paddle::OptimizerConfig::Linear); + optimizerConfigV2.mutable_linear_lr()->set_learning_rate( + trainerConfig_.learning_rate()); + optimizerConfigV2.mutable_linear_lr()->set_lr_decay_a( + trainerConfig_.learning_rate_decay_a()); + optimizerConfigV2.mutable_linear_lr()->set_lr_decay_b( + trainerConfig_.learning_rate_decay_b()); + } else { + LOG(ERROR) << "got unsupported v1 learning_rate_schedule config: " + << trainerConfig_.learning_rate_schedule() << ", set to const"; + optimizerConfigV2.set_lr_policy(paddle::OptimizerConfig::Const); + } + + // overwrite optimizerConfigV2 for per-parameter(layer) configs for (int i = 0; i < parameterSize(); ++i) { auto paramConfig = parameters_[i]->getConfig(); - LOG(INFO) << "old param config: " << paramConfig.DebugString(); - // FIXME(typhoonzero): convert old paramConfig to optimizerConfig - OptimizerConfig optimizeConfigV2; - auto sgdConfigV2 = optimizeConfigV2.mutable_sgd(); - sgdConfigV2->set_momentum(paramConfig.momentum()); - sgdConfigV2->set_decay(paramConfig.decay_rate()); - optimizeConfigV2.set_lr_policy(paddle::OptimizerConfig::Const); - auto constlr = optimizeConfigV2.mutable_const_lr(); + if (paramConfig.has_momentum() && + trainerConfig_.learning_method() == "momentum") { + optimizerConfigV2.mutable_sgd()->set_momentum(paramConfig.momentum()); + } if (paramConfig.has_learning_rate()) { - constlr->set_learning_rate(paramConfig.learning_rate()); - } else { - constlr->set_learning_rate(trainerConfig_.learning_rate()); + switch (optimizerConfigV2.lr_policy()) { + case 0: + optimizerConfigV2.mutable_const_lr()->set_learning_rate( + paramConfig.learning_rate()); + break; + case 1: + optimizerConfigV2.mutable_linear_lr()->set_learning_rate( + paramConfig.learning_rate()); + break; + } } - if (trainerConfig_.algorithm() == "sgd") { - optimizeConfigV2.set_optimizer(paddle::OptimizerConfig::SGD); - // FIXME: config all algorithms - } else { - optimizeConfigV2.set_optimizer(paddle::OptimizerConfig::SGD); + if (paramConfig.has_decay_rate()) { + switch (optimizerConfigV2.optimizer()) { + case 1: // SGD + optimizerConfigV2.mutable_sgd()->set_decay( + paramConfig.decay_rate()); + break; + case 2: // Adadelta + optimizerConfigV2.mutable_adadelta()->set_decay( + paramConfig.decay_rate()); + break; + case 3: // Adagrad + optimizerConfigV2.mutable_adagrad()->set_decay( + paramConfig.decay_rate()); + break; + case 4: // Adam + optimizerConfigV2.mutable_adam()->set_decay( + paramConfig.decay_rate()); + break; + } } - std::string bytes = optimizeConfigV2.SerializeAsString(); + // send param and config to pserver + std::string bytes = optimizerConfigV2.SerializeAsString(); const char *array = bytes.data(); int size = (int)bytes.size(); paddle_init_param( diff --git a/paddle/trainer/TrainerConfigHelper.cpp b/paddle/trainer/TrainerConfigHelper.cpp index 133e2be104c6fbfddefd8698d2b6aa8315c56c70..eba40862b926cfe863c569e73a6a3ceabcf1f3b4 100644 --- a/paddle/trainer/TrainerConfigHelper.cpp +++ b/paddle/trainer/TrainerConfigHelper.cpp @@ -28,6 +28,8 @@ DECLARE_bool(with_cost); DECLARE_bool(with_gpu); DECLARE_bool(parallel_nn); DECLARE_string(config_args); +DECLARE_bool(use_mkldnn); +DECLARE_bool(use_mkldnn_wgt); const char *kConfigParserModuleName = "paddle.trainer.config_parser"; const char *kConfigParserFuncName = "parse_config_and_serialize"; @@ -44,6 +46,8 @@ TrainerConfigHelper::TrainerConfigHelper(const std::string &configFilePath) configArgs << "trainer_id=" << FLAGS_trainer_id << ",local=" << FLAGS_local << ",with_cost=" << FLAGS_with_cost << ",use_gpu=" << FLAGS_use_gpu << ",parallel_nn=" << FLAGS_parallel_nn + << ",use_mkldnn=" << FLAGS_use_mkldnn + << ",use_mkldnn_wgt=" << FLAGS_use_mkldnn_wgt << ",cudnn_version=" << hl_get_cudnn_lib_version(); if (!FLAGS_config_args.empty()) { configArgs << "," << FLAGS_config_args; diff --git a/paddle/trainer/tests/CMakeLists.txt b/paddle/trainer/tests/CMakeLists.txt index 08b2d8a38e2d20a357752269bd3ee3f515116abd..f01ad4142d4fe7c7f7d7aac60d967ea114b93e56 100644 --- a/paddle/trainer/tests/CMakeLists.txt +++ b/paddle/trainer/tests/CMakeLists.txt @@ -2,19 +2,19 @@ add_unittest_without_exec(test_Compare test_Compare.cpp) add_test(NAME test_Compare - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_Compare - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) ################# test_Trainer ########################### add_unittest_without_exec(test_Trainer test_Trainer.cpp) add_test(NAME test_Trainer - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ - ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/paddle/trainer/tests/gen_proto_data.py && - ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ + ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/paddle/trainer/tests/gen_proto_data.py && + ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ ${CMAKE_CURRENT_BINARY_DIR}/test_Trainer - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) ############### test_TrainerOnePass ########################## if(WITH_PYTHON) @@ -23,60 +23,60 @@ if(WITH_PYTHON) add_unittest_without_exec(test_TrainerOnePass test_TrainerOnePass.cpp) add_test(NAME test_TrainerOnePass - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d - ${PROJ_ROOT}/python/:${PROJ_ROOT}/paddle/trainer/tests - ${PROJ_ROOT}/paddle/.set_port.sh -p port ${CMAKE_CURRENT_BINARY_DIR}/test_TrainerOnePass - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d + ${PADDLE_SOURCE_DIR}/python/:${PADDLE_SOURCE_DIR}/paddle/trainer/tests + ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port ${CMAKE_CURRENT_BINARY_DIR}/test_TrainerOnePass + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) endif() ################ test_CompareTwoNets ###################### add_unittest_without_exec(test_CompareTwoNets test_CompareTwoNets.cpp) add_test(NAME test_CompareTwoNets - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ ${CMAKE_CURRENT_BINARY_DIR}/test_CompareTwoNets --config_file_a=trainer/tests/sample_trainer_config_qb_rnn.conf --config_file_b=trainer/tests/sample_trainer_config_rnn.conf - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) ############### test_CompareTwoOpts ################### add_unittest_without_exec(test_CompareTwoOpts test_CompareTwoOpts.cpp) add_test(NAME test_CompareTwoOpts - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ ${CMAKE_CURRENT_BINARY_DIR}/test_CompareTwoOpts --config_file_a=trainer/tests/sample_trainer_config_opt_a.conf --config_file_b=trainer/tests/sample_trainer_config_opt_b.conf --num_passes=1 --need_high_accuracy=0 - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) ################# test_CompareSparse ################## add_unittest_without_exec(test_CompareSparse test_CompareSparse.cpp) if(NOT ON_TRAVIS) add_test(NAME test_CompareSparse - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ ./.set_port.sh -p port -n 6 ${CMAKE_CURRENT_BINARY_DIR}/test_CompareSparse - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) endif() ################# test_recurrent_machine_generation ############### add_unittest_without_exec(test_recurrent_machine_generation test_recurrent_machine_generation.cpp) add_test(NAME test_recurrent_machine_generation - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ ${CMAKE_CURRENT_BINARY_DIR}/test_recurrent_machine_generation - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) #################### test_PyDataProviderWrapper ######################### add_unittest_without_exec(test_PyDataProviderWrapper test_PyDataProviderWrapper.cpp) add_test(NAME test_PyDataProviderWrapper - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d - ${PROJ_ROOT}/python/:${PROJ_ROOT}/paddle/trainer/tests + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d + ${PADDLE_SOURCE_DIR}/python/:${PADDLE_SOURCE_DIR}/paddle/trainer/tests ${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProviderWrapper - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) #################### test_config_parser ######################### add_test(NAME test_config_parser - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ - ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/paddle/trainer/tests/config_parser_test.py - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ + ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/paddle/trainer/tests/config_parser_test.py + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) diff --git a/paddle/utils/Flags.cpp b/paddle/utils/Flags.cpp index 320f671ed97dbadc4fa1b4b52d5611cf9239e7dd..600c83a8487191895de635dd8433f6c44e86ce77 100644 --- a/paddle/utils/Flags.cpp +++ b/paddle/utils/Flags.cpp @@ -20,6 +20,14 @@ DEFINE_bool(use_gpu, false, "Only support CPU training"); DEFINE_bool(use_gpu, true, "Whether to use GPU for training"); #endif +#ifdef PADDLE_USE_MKLDNN +// TODO(TJ): change to true when MKLDNN layers support multi-inputs +DEFINE_bool(use_mkldnn, false, "Default still keep use CPU training"); +#else +DEFINE_bool(use_mkldnn, false, "Only support CPU training"); +#endif + +DEFINE_bool(use_mkldnn_wgt, false, "Init weight from CPU weight"); DEFINE_bool(parallel_nn, false, "Whether to use multi-threads to calculate one neural network." diff --git a/paddle/utils/Flags.h b/paddle/utils/Flags.h index dc4faef8331ed47b9ce3e952389b6469cd9fda2e..0aca4c0ee036ee8490c0ceca7279df876dc21947 100644 --- a/paddle/utils/Flags.h +++ b/paddle/utils/Flags.h @@ -40,3 +40,5 @@ DECLARE_bool(show_layer_stat); DECLARE_string(predict_file); DECLARE_bool(prev_batch_state); DECLARE_string(init_model_path); +DECLARE_bool(use_mkldnn); +DECLARE_bool(use_mkldnn_wgt); diff --git a/paddle/utils/tests/CMakeLists.txt b/paddle/utils/tests/CMakeLists.txt index aa923b355377752f9b297a125f5c43c364ba9b06..c770ce169878d9998e559b1d417fc1acc88cde97 100644 --- a/paddle/utils/tests/CMakeLists.txt +++ b/paddle/utils/tests/CMakeLists.txt @@ -13,6 +13,6 @@ add_executable( link_paddle_exe(test_CustomStackTracePrint) if(NOT APPLE) add_test(NAME test_CustomStackTracePrint - COMMAND ${PROJ_ROOT}/paddle/utils/tests/test_CustomStackTracePrint.sh + COMMAND ${PADDLE_SOURCE_DIR}/paddle/utils/tests/test_CustomStackTracePrint.sh WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) endif() diff --git a/proto/CMakeLists.txt b/proto/CMakeLists.txt index e1cea8bd0de5394020a498725485cea025512e48..6212c2e60a8ed94ecc1d6e58535a2b3d365e3eb8 100644 --- a/proto/CMakeLists.txt +++ b/proto/CMakeLists.txt @@ -9,13 +9,13 @@ foreach(filename ${proto_filenames}) get_filename_component(ABS_FIL ${filename} ABSOLUTE) get_filename_component(FIL_WE ${filename} NAME_WE) set(CUR_PROTO_GEN_PY - ${PROJ_ROOT}/paddle/python/paddle/proto/${FIL_WE}_pb2.py) + ${PADDLE_SOURCE_DIR}/paddle/python/paddle/proto/${FIL_WE}_pb2.py) set(PROTO_GEN_PY ${CUR_PROTO_GEN_PY} ${PROTO_GEN_PY}) add_custom_command(OUTPUT ${CUR_PROTO_GEN_PY} COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} - ARGS "--python_out=${PROJ_ROOT}/python/paddle/proto" + ARGS "--python_out=${PADDLE_SOURCE_DIR}/python/paddle/proto" "-I" ${CMAKE_CURRENT_SOURCE_DIR} ${ABS_FIL} DEPENDS ${ABS_FIL} protoc) endforeach() diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index b5030da8e75eb94e857ae4effc6adb6d19dc0e93..16c519d45aa62694201379b8da1ca54d8a07ee9a 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1,5 +1,3 @@ -set(OUTPUT_DIR - "${CMAKE_CURRENT_BINARY_DIR}/build") file(GLOB TRAINER_PY_FILES . ./paddle/trainer/*.py) file(GLOB HELPERS_PY_FILES . ./paddle/trainer_config_helpers/*.py) @@ -18,7 +16,7 @@ SET(COPY_PADDLE_MASTER "") if(WITH_GOLANG) SET(COPY_PADDLE_MASTER "copy_paddle_master") add_custom_command(TARGET ${COPY_PADDLE_MASTER} - COMMAND cp ${paddle_master_LIB_PATH} ${PROJ_ROOT}/python/paddle/v2/master/ + COMMAND cp ${paddle_master_LIB_PATH} ${PADDLE_SOURCE_DIR}/python/paddle/v2/master/ ) add_dependencies(copy_paddle_master paddle_master) endif(WITH_GOLANG) @@ -27,19 +25,21 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in ${CMAKE_CURRENT_BINARY_DIR}/setup.py) -add_custom_command(OUTPUT ${PROJ_ROOT}/python/paddle/v2/framework/core.so - COMMAND cmake -E copy $ ${PROJ_ROOT}/python/paddle/v2/framework/core.so +add_custom_command(OUTPUT ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/core.so + COMMAND cmake -E copy $ ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/core.so DEPENDS paddle_pybind) -add_custom_target(copy_paddle_pybind ALL DEPENDS ${PROJ_ROOT}/python/paddle/v2/framework/core.so) +add_custom_target(copy_paddle_pybind ALL DEPENDS ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/core.so) -add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp +add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel - COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT_DIR}/.timestamp + COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp + COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/lib-python + COMMAND ${CMAKE_COMMAND} -E copy_directory ${PADDLE_PYTHON_BUILD_DIR}/lib* ${PADDLE_PYTHON_BUILD_DIR}/lib-python DEPENDS gen_proto_py copy_paddle_pybind framework_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER}) add_custom_target(paddle_python ALL DEPENDS - ${OUTPUT_DIR}/.timestamp paddle_pserver_main paddle_trainer paddle_merge_model python_api_wheel) + ${PADDLE_PYTHON_BUILD_DIR}/.timestamp paddle_pserver_main paddle_trainer paddle_merge_model python_api_wheel) set(PADDLE_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/) diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index b7b696ef0c13e1bae2e910e08d1a1ea3e45cd5d5..da99e5bd53458aa0cb201a3525e28c66ab63c52d 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1604,6 +1604,8 @@ class MultiClassCrossEntropySelfNormCostLayer(LayerBase): @config_layer('fc') class FCLayer(LayerBase): + layer_type = 'fc' + def __init__(self, name, size, @@ -1611,14 +1613,27 @@ class FCLayer(LayerBase): bias=True, error_clipping_threshold=None, **xargs): - super(FCLayer, self).__init__(name, 'fc', size, inputs=inputs, **xargs) + use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0))) + use_mkldnn_wgt = bool( + int(g_command_config_args.get("use_mkldnn_wgt", 0))) + if use_mkldnn: + self.layer_type = 'mkldnn_fc' + config_assert( + len(inputs) == 1, + "MkldnnFCLayer support one and only one input!") + super(FCLayer, self).__init__( + name, self.layer_type, size, inputs=inputs, **xargs) for input_index in xrange(len(self.inputs)): input_layer = self.get_input_layer(input_index) psize = self.config.size * input_layer.size dims = [input_layer.size, self.config.size] format = self.inputs[input_index].format sparse = format == "csr" or format == "csc" - + if use_mkldnn: + config_assert(not sparse, + "MkldnnFCLayer do not support sparse format yet") + if use_mkldnn_wgt: + dims = [self.config.size, input_layer.size] if sparse: psize = self.inputs[input_index].nnz else: @@ -1631,6 +1646,11 @@ class FCLayer(LayerBase): self.config.error_clipping_threshold = error_clipping_threshold +@config_layer('mkldnn_fc') +class MkldnnFcLayer(FCLayer): + layer_type = 'mkldnn_fc' + + @config_layer('selective_fc') class SelectiveFCLayer(LayerBase): def __init__(self, diff --git a/python/paddle/trainer_config_helpers/tests/CMakeLists.txt b/python/paddle/trainer_config_helpers/tests/CMakeLists.txt index 6c860fd49702ebc93612114011361efb885c62ec..580aef935b5cec385a88fb0b4f5b9a5ddeddb40c 100644 --- a/python/paddle/trainer_config_helpers/tests/CMakeLists.txt +++ b/python/paddle/trainer_config_helpers/tests/CMakeLists.txt @@ -1,17 +1,17 @@ #################### test_config_parser ######################### add_test(NAME layers_test - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ - ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/layers_test.py - WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ + ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/python/paddle/trainer_config_helpers/tests/layers_test.py + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/python/paddle) add_test(NAME test_reset_hook - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ - ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/test_reset_hook.py - WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ + ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/python/paddle/trainer_config_helpers/tests/test_reset_hook.py + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/python/paddle) add_paddle_exe(protobuf_equal ProtobufEqualMain.cpp) add_test(NAME test_layerHelpers COMMAND - ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh ${PYTHON_EXECUTABLE} + ${PADDLE_SOURCE_DIR}/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/protobuf_equal ) diff --git a/python/paddle/v2/framework/.gitignore b/python/paddle/v2/framework/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..2ff540d5764b76cf7bac64fc2bb9df6e9c1b398a --- /dev/null +++ b/python/paddle/v2/framework/.gitignore @@ -0,0 +1 @@ +proto diff --git a/python/paddle/v2/framework/op.py b/python/paddle/v2/framework/op.py index 7fd8b55a5d167294d3270c79f7b64da03443afd3..904de08da4efa4df49cdc1e391e2674608a4e84b 100644 --- a/python/paddle/v2/framework/op.py +++ b/python/paddle/v2/framework/op.py @@ -1,7 +1,5 @@ import paddle.v2.framework.core as core -import paddle.v2.framework.proto.op_proto_pb2 as op_proto_pb2 -import paddle.v2.framework.proto.op_desc_pb2 as op_desc_pb2 -import paddle.v2.framework.proto.attribute_pb2 as attribute_pb2 +import paddle.v2.framework.proto.framework_pb2 as framework_pb2 def get_all_op_protos(): @@ -12,11 +10,15 @@ def get_all_op_protos(): protostrs = core.get_all_op_protos() ret_values = [] for pbstr in protostrs: - op_proto = op_proto_pb2.OpProto.FromString(str(pbstr)) + op_proto = framework_pb2.OpProto.FromString(str(pbstr)) ret_values.append(op_proto) return ret_values +def is_str(s): + return isinstance(s, str) or isinstance(s, unicode) + + class OpDescCreationMethod(object): """ A Functor object to convert user input(use key word args) to OpDesc based on @@ -27,7 +29,7 @@ class OpDescCreationMethod(object): """ def __init__(self, op_proto): - if not isinstance(op_proto, op_proto_pb2.OpProto): + if not isinstance(op_proto, framework_pb2.OpProto): raise TypeError("Argument should be OpProto") self.__op_proto__ = op_proto @@ -39,26 +41,34 @@ class OpDescCreationMethod(object): """ if len(args) != 0: raise ValueError("Only keyword arguments is supported by Paddle") - op_desc = op_desc_pb2.OpDesc() - - # Inputs - ipts, ipt_format, _ = OpDescCreationMethod.extract_input_or_output( - "input", kwargs, self.__op_proto__.inputs) - op_desc.inputs.extend(ipts) - if ipt_format is not None: - op_desc.attrs.extend([ipt_format]) - - # Outputs - outs, out_format, tmp_index = OpDescCreationMethod.extract_input_or_output( - "output", kwargs, self.__op_proto__.outputs) - op_desc.outputs.extend(outs) - if out_format is not None: - op_desc.attrs.extend([out_format]) - if len(tmp_index) != 0: - tmp_index_attr = op_desc.attrs.add() - tmp_index_attr.type = attribute_pb2.INTS - tmp_index_attr.name = "temporary_index" - tmp_index_attr.ints.extend(tmp_index) + op_desc = framework_pb2.OpDesc() + + for input_parameter in self.__op_proto__.inputs: + input_arguments = kwargs.get(input_parameter.name, []) + if is_str(input_arguments): + input_arguments = [input_arguments] + + if not input_parameter.duplicable and len(input_arguments) > 1: + raise ValueError("Input %s only accepts one input, but give %d" + % (input_parameter.name, len(input_arguments))) + + ipt = op_desc.inputs.add() + ipt.parameter = input_parameter.name + ipt.arguments.extend(input_arguments) + + for output_parameter in self.__op_proto__.outputs: + output_arguments = kwargs.get(output_parameter.name, []) + if is_str(output_arguments): + output_arguments = [output_arguments] + + if not output_parameter.duplicable and len(output_arguments) > 1: + raise ValueError( + "Output %s only accepts one output, but give %d" % + (output_parameter.name, len(output_arguments))) + + out = op_desc.outputs.add() + out.parameter = output_parameter.name + out.arguments.extend(output_arguments) # Types op_desc.type = self.__op_proto__.type @@ -72,17 +82,17 @@ class OpDescCreationMethod(object): new_attr = op_desc.attrs.add() new_attr.name = attr.name new_attr.type = attr.type - if attr.type == attribute_pb2.INT: + if attr.type == framework_pb2.INT: new_attr.i = user_defined_attr - elif attr.type == attribute_pb2.FLOAT: + elif attr.type == framework_pb2.FLOAT: new_attr.f = user_defined_attr - elif attr.type == attribute_pb2.STRING: + elif attr.type == framework_pb2.STRING: new_attr.s = user_defined_attr - elif attr.type == attribute_pb2.INTS: + elif attr.type == framework_pb2.INTS: new_attr.ints.extend(user_defined_attr) - elif attr.type == attribute_pb2.FLOATS: + elif attr.type == framework_pb2.FLOATS: new_attr.floats.extend(user_defined_attr) - elif attr.type == attribute_pb2.STRINGS: + elif attr.type == framework_pb2.STRINGS: new_attr.strings.extend(user_defined_attr) else: raise NotImplementedError("Not support attribute type " + @@ -90,50 +100,6 @@ class OpDescCreationMethod(object): return op_desc - @staticmethod - def extract_input_or_output(in_out, kwargs, meta): - """ - Extract input variable names or output variable names from key-word - arguments, which base on VarProtos. - - :param in_out: "input" or "output" - :param kwargs: key-word arguments that user inputted. - :param meta: a list of VarProto - :return: The three object will be return. The variable names. The - input_format or output_format attribute(None if the input or output is - not multiple). The temporary variable index list. - """ - multiple = OpDescCreationMethod.any_is_true((m.multiple for m in meta)) - tmp_index = [] - retv = [] - if multiple: - var_format = op_desc_pb2.AttrDesc() - var_format.type = attribute_pb2.INTS - var_format.name = "%s_format" % in_out - var_format.ints.append(0) - - for var in meta: - var_name = var.name - - if var.temporary: - var_name = [core.var_names.temp()] - tmp_index.append(len(retv)) - else: - var_name = kwargs.get(var_name, []) - if not isinstance(var_name, list): - var_name = [var_name] - retv.extend(var_name) - var_format.ints.append(len(var_name) + var_format.ints[-1]) - return retv, var_format, tmp_index - else: - for var in meta: - if var.temporary: - retv.append(kwargs.get(var.name, core.var_names.temp())) - tmp_index.append(len(retv)) - else: - retv.append(kwargs.get(var.name, core.var_names.empty())) - return retv, None, tmp_index - @staticmethod def any_is_true(generator): """ @@ -146,13 +112,12 @@ class OpDescCreationMethod(object): class OpInfo(object): - def __init__(self, name, method, inputs, outputs, attrs, no_temp_outputs): + def __init__(self, name, method, inputs, outputs, attrs): self.name = name self.method = method self.inputs = inputs self.outputs = outputs self.attrs = attrs - self.no_temp_outputs = no_temp_outputs def create_op_creation_method(op_proto): @@ -170,10 +135,7 @@ def create_op_creation_method(op_proto): name=op_proto.type, inputs=[var.name for var in op_proto.inputs], outputs=[var.name for var in op_proto.outputs], - attrs=[attr.name for attr in op_proto.attrs], - no_temp_outputs=[ - var.name for var in op_proto.outputs if not var.temporary - ]) + attrs=[attr.name for attr in op_proto.attrs]) class OperatorFactory(object): @@ -214,8 +176,5 @@ class OperatorFactory(object): def get_op_attr_names(self, type): return self.get_op_info(type).attrs - def get_op_no_temp_output_names(self, type): - return self.get_op_info(type).no_temp_outputs - Operator = OperatorFactory() # Default global factory diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index 10659caa882fd3d4060f9947413a392c3b681ee8..96fad9b42e04a88fdcbda093683b57451b2a3e41 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -1,6 +1,5 @@ py_test(test_net SRCS test_net.py) -py_test(test_fc_op SRCS test_fc_op.py) py_test(test_scope SRCS test_scope.py) py_test(test_tensor SRCS test_tensor.py) @@ -21,5 +20,8 @@ py_test(gradient_checker SRCS gradient_checker.py) py_test(test_rowwise_add_op SRCS test_rowwise_add_op.py) py_test(test_default_scope_funcs SRCS test_default_scope_funcs.py) + py_test(test_operator SRCS test_operator.py) +# py_test(test_gaussian_random_op SRCS test_gaussian_random_op.py) py_test(test_uniform_random_op SRCS test_uniform_random_op.py) +py_test(test_recurrent_op SRCS test_recurrent_op.py) diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index b73c4869d14a62a951d8e45dafb14b7523355519..501cf6110ff745b8a6022b463bc9cc3a70145c60 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -53,15 +53,18 @@ def get_numeric_gradient(op, tensor.set(input_values[var_name], core.CPUPlace()) # Create all output variable in local_scope - for output in op.outputs(): - if local_scope.find_var(output) is None: - local_scope.new_var(output).get_tensor() - + opts = op.outputs() + for key in opts: + for output in opts[key]: + if local_scope.find_var(output) is None: + local_scope.new_var(output).get_tensor() op.infer_shape(local_scope) # allocate output memory - for output in op.outputs(): - local_scope.find_var(output).get_tensor().alloc_float(core.CPUPlace()) + for key in opts: + for output in opts[key]: + local_scope.find_var(output).get_tensor().alloc_float(core.CPUPlace( + )) # TODO(yuyang18): Only CPU is support now. cpu_ctx = core.DeviceContext.create(core.CPUPlace()) @@ -73,34 +76,60 @@ def get_numeric_gradient(op, def product(dim): return reduce(lambda a, b: a * b, dim, 1) + # get the input tensor that we want to get it's numeric gradient. tensor_to_check = local_scope.find_var(input_to_check).get_tensor() tensor_size = product(tensor_to_check.get_dims()) + # prepare a numpy array to store the gradient. gradient_flat = numpy.zeros(shape=(tensor_size, ), dtype='float32') + + # we only compute gradient of one element each time. + # we use a for loop to compute the gradient of every element. for i in xrange(tensor_size): + # get one input element throw it's index i. origin = tensor_to_check.get_float_element(i) + + # add delta to it, run op and then get the sum of the result tensor. x_pos = origin + delta tensor_to_check.set_float_element(i, x_pos) y_pos = get_output() + # plus delta to this element, run op and get the sum of the result tensor. x_neg = origin - delta tensor_to_check.set_float_element(i, x_neg) y_neg = get_output() - tensor_to_check.set_float_element(i, origin) # restore old value + # restore old value + tensor_to_check.set_float_element(i, origin) + + # compute the gradient of this element and store it into a numpy array. gradient_flat[i] = (y_pos - y_neg) / delta / 2 + + # reshape the gradient result to the shape of the source tensor. return gradient_flat.reshape(tensor_to_check.get_dims()) class GradientChecker(unittest.TestCase): - def __is_close(self, numeric_grads, scope, max_relative_error): + def assert_is_close(self, numeric_grads, scope, max_relative_error, + msg_prefix): for name in numeric_grads: - op_grad = numpy.array( - scope.find_var(grad_var_name(name)).get_tensor()) - is_close = numpy.allclose( - numeric_grads[name], op_grad, rtol=max_relative_error, atol=100) - if not is_close: - return False - return True + b = numpy.array(scope.find_var(grad_var_name(name)).get_tensor()) + a = numeric_grads[name] + + abs_a = numpy.abs(a) + # if abs_a is nearly zero, then use abs error for a, not relative + # error. + abs_a[abs_a < 1e-3] = 1 + + diff_mat = numpy.abs(a - b) / abs_a + max_diff = numpy.max(diff_mat) + + def err_msg(): + offset = numpy.argmax(diff_mat > max_relative_error) + return "%s Variable %s max gradient diff %f over limit %f, the first " \ + "error element is %d" % ( + msg_prefix, name, max_diff, max_relative_error, offset) + + self.assertLessEqual(max_diff, max_relative_error, err_msg()) def check_grad(self, forward_op, @@ -124,19 +153,24 @@ class GradientChecker(unittest.TestCase): if no_grad_set is None: no_grad_set = set() - tmp_outs = forward_op.temp_outputs() - no_tmp_out = filter(lambda name: name not in tmp_outs, - forward_op.outputs()) + no_tmp_out = forward_op.no_intermediate_outputs() if len(no_tmp_out) != 1: raise ValueError("non temp out_names should be 1") - in_names = forward_op.inputs() + inputs = forward_op.inputs() + in_names = [item for k in inputs for item in inputs[k]] + outputs = forward_op.outputs() + out_names = [item for k in outputs for item in outputs[k]] + for no_grad in no_grad_set: if no_grad not in in_names: raise ValueError("no_grad should be in in_names") backward_op = core.Operator.backward(forward_op, no_grad_set) + bwd_outputs = backward_op.outputs() + bwd_out_names = [item for k in bwd_outputs for item in bwd_outputs[k]] + places = [core.CPUPlace()] if not only_cpu and core.is_compile_gpu() and backward_op.support_gpu(): places.append(core.GPUPlace(0)) @@ -145,7 +179,8 @@ class GradientChecker(unittest.TestCase): # get numeric gradient for check_name in inputs_to_check: numeric_grad[check_name] = \ - get_numeric_gradient(forward_op, input_vars, output_name, check_name) + get_numeric_gradient(forward_op, input_vars, output_name, + check_name) # get operator gradient according to different device for place in places: @@ -161,7 +196,7 @@ class GradientChecker(unittest.TestCase): var.set(value, place) # create output var - for out_name in forward_op.outputs(): + for out_name in out_names: scope.new_var(out_name).get_tensor() # infer the shape of output var and compute/set value of output var @@ -171,7 +206,7 @@ class GradientChecker(unittest.TestCase): # create output grad var # set shape as the output var # set value of this grad to ones - for name in forward_op.outputs(): + for name in out_names: out_tensor = scope.find_var(name).get_tensor() grad_tensor = scope.new_var(grad_var_name(name)).get_tensor() grad_tensor.set_dims(out_tensor.shape()) @@ -179,7 +214,7 @@ class GradientChecker(unittest.TestCase): grad_tensor.set(data, place) # create input grad var - for name in backward_op.outputs(): + for name in bwd_out_names: scope.new_var(name).get_tensor() # infer the shape of input gradient var and compute/set it's value @@ -187,15 +222,8 @@ class GradientChecker(unittest.TestCase): backward_op.infer_shape(scope) backward_op.run(scope, ctx) - if isinstance(place, core.CPUPlace): - msg = "CPU kernel gradient is not close to numeric gradient" - else: - if isinstance(place, core.GPUPlace): - msg = "GPU kernel gradient is not close to numeric gradient" - else: - raise ValueError("unknown place " + type(place)) - self.assertTrue( - self.__is_close(numeric_grad, scope, max_relative_error), msg) + self.assert_is_close(numeric_grad, scope, max_relative_error, + "Gradient Check On %s" % str(place)) if __name__ == '__main__': diff --git a/python/paddle/v2/framework/tests/test_add_two_op.py b/python/paddle/v2/framework/tests/test_add_two_op.py index c0237830647371e14b755953345965a3eac7bfd2..0def484eddb88604398ee10390d3f28058714a57 100644 --- a/python/paddle/v2/framework/tests/test_add_two_op.py +++ b/python/paddle/v2/framework/tests/test_add_two_op.py @@ -19,14 +19,5 @@ class TestAddOp(unittest.TestCase): self.outputs = {'Out': self.inputs['X'] + self.inputs['Y']} -class TestAddGradOp(unittest.TestCase): - def test_add_grad(self): - op = Operator('add_two', X="X", Y="Y", Out="Out") - backward_op = core.Operator.backward(op, set()) - self.assertEqual(backward_op.type(), "add_two_grad") - expected = '''Op(add_two_grad), inputs:(X, Y, Out, Out@GRAD), outputs:(X@GRAD, Y@GRAD).''' - self.assertEqual(expected, str(backward_op)) - - if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/framework/tests/test_fc_op.py b/python/paddle/v2/framework/tests/test_fc_op.py deleted file mode 100644 index e24435839d305bb1a4ab7daa3e9684a421468fd8..0000000000000000000000000000000000000000 --- a/python/paddle/v2/framework/tests/test_fc_op.py +++ /dev/null @@ -1,45 +0,0 @@ -import paddle.v2.framework.core as core -import unittest -import numpy -from paddle.v2.framework.op import Operator - - -class TestFc(unittest.TestCase): - def test_fc(self): - scope = core.Scope() - place = core.CPUPlace() - x = scope.new_var("X") - - x_tensor = x.get_tensor() - x_tensor.set_dims([1000, 784]) - x_tensor.alloc_float(place) - - w = scope.new_var("W") - w_tensor = w.get_tensor() - w_tensor.set_dims([784, 100]) - w_tensor.alloc_float(place) - - w_tensor.set(numpy.random.random((784, 100)).astype("float32"), place) - - # Set a real numpy array here. - # x_tensor.set(numpy.array([])) - - op = Operator("fc", X="X", Y="Y", W="W") - - for out in op.outputs(): - if scope.find_var(out) is None: - scope.new_var(out).get_tensor() - - tensor = scope.find_var("Y").get_tensor() - op.infer_shape(scope) - self.assertEqual([1000, 100], tensor.shape()) - - ctx = core.DeviceContext.create(place) - - op.run(scope, ctx) - - # After complete all ops, check Y is expect or not. - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/v2/framework/tests/test_gaussian_random_op.py b/python/paddle/v2/framework/tests/test_gaussian_random_op.py new file mode 100644 index 0000000000000000000000000000000000000000..f95ed70b58d611b3233a21d3f2a34c864ae4d1b3 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_gaussian_random_op.py @@ -0,0 +1,36 @@ +import unittest +import paddle.v2.framework.core as core +from paddle.v2.framework.op import Operator +import numpy + + +class GaussianRandomTest(unittest.TestCase): + def test_cpu(self): + self.gaussian_random_test(place=core.CPUPlace()) + + def test_gpu(self): + if core.is_compile_gpu(): + self.gaussian_random_test(place=core.GPUPlace(0)) + + def gaussian_random_test(self, place): + scope = core.Scope() + scope.new_var("Out").get_tensor() + + op = Operator( + "gaussian_random", + Out="Out", + dims=[1000, 784], + mean=.0, + std=1., + seed=10) + + op.infer_shape(scope) + context = core.DeviceContext.create(place) + op.run(scope, context) + tensor = numpy.array(scope.find_var("Out").get_tensor()) + self.assertAlmostEqual(numpy.mean(tensor), .0, delta=0.1) + self.assertAlmostEqual(numpy.std(tensor), 1., delta=0.1) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_net.py b/python/paddle/v2/framework/tests/test_net.py index b30896553dea4a4929038d524b23c6090bbed380..b42cadd11ab75abbc35763c8d12e8c27e995f0dc 100644 --- a/python/paddle/v2/framework/tests/test_net.py +++ b/python/paddle/v2/framework/tests/test_net.py @@ -3,6 +3,15 @@ from paddle.v2.framework.op import Operator import unittest +def fc(X, W, Y): + ret_v = core.Net.create() + + ret_v.add_op(Operator("mul", X="X", Y="W", Out="pre_activation")) + ret_v.add_op(Operator("sigmoid", X="pre_activation", Y=Y)) + ret_v.complete_add_op(True) + return ret_v + + class TestNet(unittest.TestCase): def test_net_all(self): net = core.Net.create() @@ -10,18 +19,18 @@ class TestNet(unittest.TestCase): net.add_op(op1) net2 = core.Net.create() - net2.add_op(Operator("fc", X="X", W="w", Y="fc.out")) + net2.add_op(fc(X="X", W="w", Y="fc.out")) net2.complete_add_op(True) net.add_op(net2) net.complete_add_op(True) expected = ''' -Op(plain_net), inputs:(@EMPTY@, X, Y, w), outputs:(@TEMP@fc@0, Out, fc.out). - Op(add_two), inputs:(X, Y), outputs:(Out). - Op(plain_net), inputs:(@EMPTY@, X, w), outputs:(@TEMP@fc@0, fc.out). - Op(fc), inputs:(X, w, @EMPTY@), outputs:(fc.out, @TEMP@fc@0). - Op(mul), inputs:(X, w), outputs:(@TEMP@fc@0). - Op(sigmoid), inputs:(@TEMP@fc@0), outputs:(fc.out). +Op(plain_net), inputs:{all[W, X, Y]}, outputs:{all[Out, fc.out, pre_activation]}. + Op(add_two), inputs:{X[X], Y[Y]}, outputs:{Out[Out]}. + Op(plain_net), inputs:{all[W, X]}, outputs:{all[fc.out, pre_activation]}. + Op(plain_net), inputs:{all[W, X]}, outputs:{all[fc.out, pre_activation]}. + Op(mul), inputs:{X[X], Y[W]}, outputs:{Out[pre_activation]}. + Op(sigmoid), inputs:{X[pre_activation]}, outputs:{Y[fc.out]}. ''' self.assertEqual(expected, "\n" + str(net)) diff --git a/python/paddle/v2/framework/tests/test_operator.py b/python/paddle/v2/framework/tests/test_operator.py index 4f164e1a69e3fd0409f9b575a8bd9b4e423b486b..1abc4eeb57bcedc81e34b0e156048ee4f5cfdc2d 100644 --- a/python/paddle/v2/framework/tests/test_operator.py +++ b/python/paddle/v2/framework/tests/test_operator.py @@ -1,9 +1,7 @@ import unittest import paddle.v2.framework.op as op import paddle.v2.framework.core as core -import paddle.v2.framework.proto.op_proto_pb2 as op_proto_pb2 -import paddle.v2.framework.proto.op_desc_pb2 as op_desc_pb2 -import paddle.v2.framework.proto.attribute_pb2 as attribute_pb2 +import paddle.v2.framework.proto.framework_pb2 as framework_pb2 class TestGetAllProtos(unittest.TestCase): @@ -17,7 +15,7 @@ class TestGetAllProtos(unittest.TestCase): class TestOpDescCreationMethod(unittest.TestCase): def test_plain_input_output(self): - op_proto = op_proto_pb2.OpProto() + op_proto = framework_pb2.OpProto() op_proto.type = "test" ipt = op_proto.inputs.add() ipt.name = "X" @@ -37,25 +35,32 @@ class TestOpDescCreationMethod(unittest.TestCase): method = op.OpDescCreationMethod(op_proto) output = method(X="a", Y="b", Z="c") - - expected = op_desc_pb2.OpDesc() + expected = framework_pb2.OpDesc() expected.type = "test" - expected.inputs.extend(["a", "b"]) - expected.outputs.append("c") + ipt_0 = expected.inputs.add() + ipt_0.parameter = "X" + ipt_0.arguments.extend(["a"]) + ipt_1 = expected.inputs.add() + ipt_1.parameter = 'Y' + ipt_1.arguments.extend(['b']) + opt = expected.outputs.add() + opt.parameter = "Z" + opt.arguments.extend(["c"]) + self.assertEqual(expected, output) def test_multiple_input_plain_output(self): - op_proto = op_proto_pb2.OpProto() + op_proto = framework_pb2.OpProto() op_proto.type = "fc" ipt = op_proto.inputs.add() ipt.name = "X" ipt.comment = "" - ipt.multiple = True + ipt.duplicable = True ipt = op_proto.inputs.add() ipt.name = "W" ipt.comment = "" - ipt.multiple = True + ipt.duplicable = True ipt = op_proto.inputs.add() ipt.name = "b" @@ -70,30 +75,50 @@ class TestOpDescCreationMethod(unittest.TestCase): method = op.OpDescCreationMethod(op_proto) generated1 = method(X="x", W="w", b="b", Y="y") - expected1 = op_desc_pb2.OpDesc() - expected1.inputs.extend(['x', 'w', 'b']) - expected1.outputs.extend(['y']) + expected1 = framework_pb2.OpDesc() + tmp = expected1.inputs.add() + tmp.parameter = "X" + tmp.arguments.extend(['x']) + + tmp = expected1.inputs.add() + tmp.parameter = 'W' + tmp.arguments.extend(['w']) + + tmp = expected1.inputs.add() + tmp.parameter = 'b' + tmp.arguments.extend(['b']) + + tmp = expected1.outputs.add() + tmp.parameter = 'Y' + tmp.arguments.extend(['y']) expected1.type = 'fc' - attr = expected1.attrs.add() - attr.name = 'input_format' - attr.type = attribute_pb2.INTS - attr.ints.extend([0, 1, 2, 3]) self.assertEqual(expected1, generated1) generated2 = method( X=['x1', 'x2', 'x3'], b='b', W=['w1', 'w2', 'w3'], Y='y') - expected2 = op_desc_pb2.OpDesc() - expected2.inputs.extend(['x1', 'x2', 'x3', 'w1', 'w2', 'w3', 'b']) - expected2.outputs.extend(['y']) + expected2 = framework_pb2.OpDesc() + + tmp = expected2.inputs.add() + tmp.parameter = "X" + tmp.arguments.extend(['x1', 'x2', 'x3']) + + tmp = expected2.inputs.add() + tmp.parameter = 'W' + tmp.arguments.extend(['w1', 'w2', 'w3']) + + tmp = expected2.inputs.add() + tmp.parameter = 'b' + tmp.arguments.extend(['b']) + + tmp = expected2.outputs.add() + tmp.parameter = 'Y' + tmp.arguments.extend(['y']) + expected2.type = 'fc' - attr = expected2.attrs.add() - attr.name = 'input_format' - attr.type = attribute_pb2.INTS - attr.ints.extend([0, 3, 6, 7]) self.assertEqual(expected2, generated2) def test_attrs(self): - op_proto = op_proto_pb2.OpProto() + op_proto = framework_pb2.OpProto() op_proto.type = "test" ipt = op_proto.inputs.add() ipt.name = 'X' @@ -105,12 +130,12 @@ class TestOpDescCreationMethod(unittest.TestCase): attr.comment = "" attr.type = type - __add_attr__("int_attr", attribute_pb2.INT) - __add_attr__("float_attr", attribute_pb2.FLOAT) - __add_attr__("string_attr", attribute_pb2.STRING) - __add_attr__("ints_attr", attribute_pb2.INTS) - __add_attr__("floats_attr", attribute_pb2.FLOATS) - __add_attr__("strings_attr", attribute_pb2.STRINGS) + __add_attr__("int_attr", framework_pb2.INT) + __add_attr__("float_attr", framework_pb2.FLOAT) + __add_attr__("string_attr", framework_pb2.STRING) + __add_attr__("ints_attr", framework_pb2.INTS) + __add_attr__("floats_attr", framework_pb2.FLOATS) + __add_attr__("strings_attr", framework_pb2.STRINGS) op_proto.comment = "" self.assertTrue(op_proto.IsInitialized()) @@ -126,76 +151,52 @@ class TestOpDescCreationMethod(unittest.TestCase): floats_attr=[0.2, 3.2, 4.5], strings_attr=["a", "b", "c"]) - expected = op_desc_pb2.OpDesc() + expected = framework_pb2.OpDesc() expected.type = "test" - expected.inputs.extend(['a']) + + ipt = expected.inputs.add() + ipt.parameter = "X" + ipt.arguments.extend(['a']) + attr = expected.attrs.add() attr.name = "int_attr" - attr.type = attribute_pb2.INT + attr.type = framework_pb2.INT attr.i = 10 attr = expected.attrs.add() attr.name = "float_attr" - attr.type = attribute_pb2.FLOAT + attr.type = framework_pb2.FLOAT attr.f = 3.2 attr = expected.attrs.add() attr.name = "string_attr" - attr.type = attribute_pb2.STRING + attr.type = framework_pb2.STRING attr.s = "test_str" attr = expected.attrs.add() attr.name = "ints_attr" - attr.type = attribute_pb2.INTS + attr.type = framework_pb2.INTS attr.ints.extend([0, 1, 2, 3, 4]) attr = expected.attrs.add() attr.name = 'floats_attr' - attr.type = attribute_pb2.FLOATS + attr.type = framework_pb2.FLOATS attr.floats.extend([0.2, 3.2, 4.5]) attr = expected.attrs.add() attr.name = 'strings_attr' - attr.type = attribute_pb2.STRINGS + attr.type = framework_pb2.STRINGS attr.strings.extend(['a', 'b', 'c']) self.assertEqual(expected, generated) - def test_input_temporary_output(self): - op_proto = op_proto_pb2.OpProto() - op_proto.type = "test" - out = op_proto.outputs.add() - out.name = "OUT" - out.comment = "" - - out = op_proto.outputs.add() - out.name = "TMP" - out.comment = "" - out.temporary = True - - out = op_proto.outputs.add() - out.name = "OUT2" - out.comment = "" - op_proto.comment = "" - - method = op.OpDescCreationMethod(op_proto) - generated = method(OUT="a", OUT2="b") - desc = op_desc_pb2.OpDesc() - desc.outputs.extend(["a", core.var_names.temp(), "b"]) - desc.type = "test" - attr = desc.attrs.add() - attr.name = "temporary_index" - attr.type = attribute_pb2.INTS - attr.ints.append(2) - self.assertEqual(generated, desc) - class TestOpCreations(unittest.TestCase): def test_all(self): add_op = op.Operator("add_two", X="a", Y="b", Out="z") self.assertIsNotNone(add_op) # Invoke C++ DebugString() - self.assertEqual('Op(add_two), inputs:(a, b), outputs:(z).', + self.assertEqual('Op(add_two), inputs:{X[a], Y[b]}, outputs:{Out[z]}.', str(add_op)) diff --git a/python/paddle/v2/framework/tests/test_protobuf.py b/python/paddle/v2/framework/tests/test_protobuf.py index 69e98e2f250a9df23b25e7e2043af29f87c996a0..848a396b3b6eec57d500b464780b64f339b09e94 100644 --- a/python/paddle/v2/framework/tests/test_protobuf.py +++ b/python/paddle/v2/framework/tests/test_protobuf.py @@ -1,11 +1,10 @@ -import paddle.v2.framework.proto.op_proto_pb2 as op_proto_lib -import paddle.v2.framework.proto.attribute_pb2 as attr_type_lib +import paddle.v2.framework.proto.framework_pb2 as framework_pb2 import unittest class TestFrameworkProto(unittest.TestCase): def test_all(self): - op_proto = op_proto_lib.OpProto() + op_proto = framework_pb2.OpProto() ipt0 = op_proto.inputs.add() ipt0.name = "a" ipt0.comment = "the input of cosine op" @@ -19,7 +18,7 @@ class TestFrameworkProto(unittest.TestCase): attr = op_proto.attrs.add() attr.name = "scale" attr.comment = "scale of cosine op" - attr.type = attr_type_lib.FLOAT + attr.type = framework_pb2.FLOAT op_proto.type = "cos" self.assertTrue(op_proto.IsInitialized()) diff --git a/python/paddle/v2/framework/tests/test_recurrent_op.py b/python/paddle/v2/framework/tests/test_recurrent_op.py index 5c77c477b347f4713e4af2a8cb462b243d7a779c..0db66cc4e181fde10f161a323ea749fd84a5f963 100644 --- a/python/paddle/v2/framework/tests/test_recurrent_op.py +++ b/python/paddle/v2/framework/tests/test_recurrent_op.py @@ -2,19 +2,74 @@ import logging import paddle.v2.framework.core as core import unittest import numpy as np -import paddle.v2.framework.create_op_creation_methods as creation +from paddle.v2.framework.op import Operator -ops = creation.op_creations +def py_sigmoid(x): + return 1. / (1. + np.exp(-x)) -def create_tensor(scope, name, shape): + +class PySimpleRNN(object): + ''' + A simple implementation of RNN based on numpy, to futhur test RecurrentOp's alogorithm + ''' + + def __init__(self, input_dim=30, batch_size=50, weight_dim=15, sent_len=11): + self.x = np.random.normal(size=(sent_len, batch_size, input_dim)) + self.W = np.random.normal(size=(input_dim, input_dim)) + self.U = np.random.normal(size=(input_dim, input_dim)) + self.h_boot = np.random.normal(size=(batch_size, input_dim)) + + # memories + self.mems = [ + np.zeros(shape=(batch_size, input_dim)) for i in range(sent_len) + ] + + def forward(self): + xs = self.segment_inputs() + for step_id in range(self.x.shape[0]): + self.step(step_id, xs[step_id]) + return self.concat_outputs() + + def segment_inputs(self): + return [self.x[i] for i in range(self.x.shape[0])] + + def concat_outputs(self): + return np.array(self.mems) + + def step(self, step_id, x): + ''' + run a step + ''' + mem = self.mems[step_id] + if step_id > 0: + pre_mem = self.mems[step_id - 1] + else: + pre_mem = self.h_boot + xW = np.matmul(x, self.W) + hU = np.matmul(mem, self.U) + + sum = xW + hU + self.mems[step_id] = py_sigmoid(sum) + + +class PySimpleRNNTest(unittest.TestCase): + def setUp(self): + self.rnn = PySimpleRNN() + + def test_forward(self): + output = self.rnn.forward() + print 'output', output + + +def create_tensor(scope, name, shape, np_data): tensor = scope.new_var(name).get_tensor() tensor.set_dims(shape) - tensor.set(np.random.random(shape), core.CPUPlace()) + tensor.set(np_data, core.CPUPlace()) return tensor -class TestRNN(unittest.TestCase): +class TestRecurrentOp(unittest.TestCase): ''' Test RNNOp @@ -28,7 +83,7 @@ class TestRNN(unittest.TestCase): memories: - h outputs: - - h + - h ''' input_dim = 30 @@ -36,33 +91,45 @@ class TestRNN(unittest.TestCase): weight_dim = 15 sent_len = 11 - def init(self): + def setUp(self): + self.py_rnn = PySimpleRNN(self.input_dim, self.batch_size, + self.weight_dim, self.sent_len) + def forward(self): self.scope = core.Scope() - self.create_global_variables() self.create_step_net() rnn_op = self.create_rnn_op() ctx = core.DeviceContext.create(core.CPUPlace()) - print 'infer_shape' rnn_op.infer_shape(self.scope) - rnn_op.run(self.scope, ctx) + return np.array(self.scope.find_var("h").get_tensor()) def create_global_variables(self): # create inlink + x_np_data = self.py_rnn.x create_tensor(self.scope, "x", - [self.sent_len, self.batch_size, self.input_dim]) - create_tensor(self.scope, "W", [self.input_dim, self.input_dim]) - create_tensor(self.scope, "U", [self.input_dim, self.input_dim]) - create_tensor(self.scope, "h_boot", [self.batch_size, self.input_dim]) + [self.sent_len, self.batch_size, self.input_dim], + x_np_data) + W_np_data = self.py_rnn.W + create_tensor(self.scope, "W", [self.input_dim, self.input_dim], + W_np_data) + + U_np_data = self.py_rnn.U + create_tensor(self.scope, "U", [self.input_dim, self.input_dim], + U_np_data) + + h_boot_np_data = self.py_rnn.h_boot + create_tensor(self.scope, "h_boot", [self.batch_size, self.input_dim], + h_boot_np_data) self.scope.new_var("step_scopes") self.scope.new_var("h@alias") self.scope.new_var("h") def create_rnn_op(self): # create RNNOp - rnnop = ops.recurrent_op( + rnnop = Operator( + "recurrent_op", # inputs inlinks=["x"], boot_memories=["h_boot"], @@ -81,17 +148,25 @@ class TestRNN(unittest.TestCase): var = self.scope.new_var("stepnet") stepnet = var.get_net() - x_fc_op = ops.fc(X="x@alias", W="W", Y="Wx") - h_fc_op = ops.fc(X="h@pre", W="U", Y="Uh") - sum_op = ops.add_two(X="Wx", Y="Uh", Out="sum") - sig_op = ops.sigmoid(X="sum", Y="h@alias") + # x_fc_op = Operator("fc", X="x@alias", W="W", Y="Wx") + # h_fc_op = Operator("fc", X="h@pre", W="U", Y="Uh") + x_fc_op = Operator("mul", X="x@alias", Y="W", Out="Wx") + h_fc_op = Operator("mul", X="h@pre", Y="U", Out="Uh") + sum_op = Operator("add_two", X="Wx", Y="Uh", Out="sum") + sig_op = Operator("sigmoid", X="sum", Y="h@alias") for op in [x_fc_op, h_fc_op, sum_op, sig_op]: stepnet.add_op(op) stepnet.complete_add_op(True) - def test_recurrent(self): - self.init() + def test_forward(self): + print 'test recurrent op forward' + pd_output = self.forward() + py_output = self.py_rnn.forward() + print 'pd_output', pd_output + print + print 'py_output', py_output + self.assertEqual(pd_output.shape, py_output.shape) if __name__ == '__main__': diff --git a/python/paddle/v2/optimizer.py b/python/paddle/v2/optimizer.py index ba581980334fec6226a537af2cf53b3465d32c1e..29f0945eb4c88eab8fa9ee83f455190dfd473aa4 100644 --- a/python/paddle/v2/optimizer.py +++ b/python/paddle/v2/optimizer.py @@ -1,13 +1,26 @@ -import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils -import paddle.trainer_config_helpers.optimizers as v1_optimizers +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Optimizers(update equation) for SGD method. -TODO(zhihong) : create new optimizer with proto config, add new optimizer here - TODO(yuyang18): Complete comments. """ +import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils +import paddle.trainer_config_helpers.optimizers as v1_optimizers +from paddle.proto.OptimizerConfig_pb2 import OptimizerConfig + __all__ = [ 'Momentum', 'Adam', 'Adamax', 'AdaGrad', 'DecayedAdaGrad', 'AdaDelta', 'RMSProp', 'ModelAverage', 'L2Regularization' @@ -70,7 +83,8 @@ class Optimizer(object): gradient_machine.prefetch(in_args) parameter_updater.getParametersRemote() - :param pserver_spec: pserver location, eg: localhost:3000 + :param pserver_spec: pserver location, eg: localhost:3000, if use etcd, + pserver_spec should be the etcd endpoints, eg: http://localhost:2379 :return: parameter_updater """ if is_local: diff --git a/python/paddle/v2/parameters.py b/python/paddle/v2/parameters.py index a9cba8ca0b1efd4149463f6c7bf2dcdfbea350c9..b8af5abaeada49a3e8951c21c9065aaf4d1ab851 100644 --- a/python/paddle/v2/parameters.py +++ b/python/paddle/v2/parameters.py @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import numpy as np from paddle.proto.ParameterConfig_pb2 import ParameterConfig import paddle.trainer.config_parser as cp @@ -113,16 +127,7 @@ class Parameters(object): """ return iter(self.__param_conf__) - def __getitem__(self, key): - """ - Get parameter by parameter name. It uses Python dict syntax. - - :note: It will always copy the parameter from C++ side. - :param key: Parameter name - :type key: basestring - :return: parameter value - :rtype: np.ndarray - """ + def __getter_inner(self, key, param_type): import py_paddle.swig_paddle as api shape = self.get_shape(key) @@ -138,7 +143,7 @@ class Parameters(object): each_gradient_machine, key) # for simplify implementation now, we always copy from C++ assert isinstance(param, api.Parameter) - val = param.getBuf(api.PARAMETER_VALUE) + val = param.getBuf(param_type) assert isinstance(val, api.Vector) val = val.copyToNumpyArray() return val @@ -146,6 +151,19 @@ class Parameters(object): raise RuntimeError("Unexpected branch") + def __getitem__(self, key): + """ + Get parameter by parameter name. It uses Python dict syntax. + + :note: It will always copy the parameter from C++ side. + :param key: Parameter name + :type key: basestring + :return: parameter value + :rtype: np.ndarray + """ + import py_paddle.swig_paddle as api + return self.__getter_inner(key, api.PARAMETER_VALUE) + def get_shape(self, key): """ get shape of the parameter. @@ -202,6 +220,19 @@ class Parameters(object): """ return self.__getitem__(key=parameter_name) + def get_grad(self, key): + """ + Get grandient by parameter name. + + :note: It will always copy the parameter from C++ side. + :param key: parameter name + :type key: basestring + :return: The grandient matrix. + :rtype: np.ndarray + """ + import py_paddle.swig_paddle as api + return self.__getter_inner(key, api.PARAMETER_GRADIENT) + def set(self, parameter_name, value): """ Set parameter by parameter name & matrix. @@ -250,7 +281,13 @@ class Parameters(object): size = reduce(lambda a, b: a * b, param.shape) f.write(struct.pack("IIQ", 0, 4, size)) param = param.astype(np.float32) - f.write(param.tostring()) + s = param.tostring() + wrote_size = 0 + buf = buffer(s, wrote_size, 65535) + while buf: # f.write crashes with big data blog. + f.write(buf) + wrote_size += 65535 + buf = buffer(s, wrote_size, 65535) def deserialize(self, name, f): """ diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py index 76bae0bb12b6c33f88530386f9cc19ae9b59f457..9c4dd5f25083d210bbd218a85d8dbb3cce2c3d0e 100644 --- a/python/paddle/v2/trainer.py +++ b/python/paddle/v2/trainer.py @@ -161,14 +161,14 @@ class SGD(object): self.__parameter_updater__.update(each_param) cost_sum = out_args.sum() cost = cost_sum / len(data_batch) - self.__parameter_updater__.finishBatch(cost) - batch_evaluator.finish() event_handler( v2_event.EndIteration( pass_id=pass_id, batch_id=batch_id, cost=cost, evaluator=batch_evaluator)) + self.__parameter_updater__.finishBatch(cost) + batch_evaluator.finish() self.__parameter_updater__.finishPass() pass_evaluator.finish() diff --git a/python/requirements.txt b/python/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3df822bd76d2a64a0a35f84b4ec309ce7150c221 --- /dev/null +++ b/python/requirements.txt @@ -0,0 +1,9 @@ +requests==2.9.2 +numpy>=1.12 +protobuf==3.1 +recordio +matplotlib +rarfile +scipy>=0.19.0 +Pillow +nltk>=3.2.2 diff --git a/python/setup.py.in b/python/setup.py.in index 38f0a503bee3eb29ae3c893c96d6e333be54b96e..38728aa2fd77cf3c882479ed83e99688b9ffa541 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -1,5 +1,4 @@ from setuptools import setup, Distribution - class BinaryDistribution(Distribution): def has_ext_modules(foo): return True @@ -18,15 +17,8 @@ packages=['paddle', 'paddle.v2.framework.proto', 'py_paddle'] -setup_requires=["requests", - "numpy>=1.12", - "protobuf==3.1", - "recordio", - "matplotlib", - "rarfile", - "scipy>=0.19.0", - "Pillow", - "nltk>=3.2.2"] +with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f: + setup_requires = f.read().splitlines() if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']: setup_requires+=["opencv-python"] @@ -45,14 +37,14 @@ setup(name='paddlepaddle', '': '${CMAKE_CURRENT_SOURCE_DIR}', # The paddle.v2.framework.proto will be generated while compiling. # So that package points to other directory. - 'paddle.v2.framework.proto': '${PROJ_BINARY_ROOT}/paddle/framework', - 'py_paddle': '${PROJ_ROOT}/paddle/py_paddle' + 'paddle.v2.framework.proto': '${PADDLE_BINARY_DIR}/paddle/framework', + 'py_paddle': '${PADDLE_SOURCE_DIR}/paddle/py_paddle' }, - scripts=['${PROJ_BINARY_ROOT}/paddle/scripts/paddle'], + scripts=['${PADDLE_BINARY_DIR}/paddle/scripts/paddle'], distclass=BinaryDistribution, data_files=[('/usr/local/opt/paddle/bin', - ['${PROJ_BINARY_ROOT}/paddle/scripts/paddle_usage', - '${PROJ_BINARY_ROOT}/paddle/trainer/paddle_trainer', - '${PROJ_BINARY_ROOT}/paddle/trainer/paddle_merge_model', - '${PROJ_BINARY_ROOT}/paddle/pserver/paddle_pserver_main'])] + ['${PADDLE_BINARY_DIR}/paddle/scripts/paddle_usage', + '${PADDLE_BINARY_DIR}/paddle/trainer/paddle_trainer', + '${PADDLE_BINARY_DIR}/paddle/trainer/paddle_merge_model', + '${PADDLE_BINARY_DIR}/paddle/pserver/paddle_pserver_main'])] )