提交 bfde09b8 编写于 作者: Z zchen0211

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into develop

...@@ -24,4 +24,5 @@ cmake-build-* ...@@ -24,4 +24,5 @@ cmake-build-*
python/paddle/v2/framework/core.so python/paddle/v2/framework/core.so
CMakeFiles CMakeFiles
cmake_install.cmake cmake_install.cmake
paddle/.timestamp
python/paddlepaddle.egg-info/
...@@ -14,8 +14,8 @@ ...@@ -14,8 +14,8 @@
cmake_minimum_required(VERSION 3.0) cmake_minimum_required(VERSION 3.0)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
set(PROJ_ROOT ${CMAKE_CURRENT_SOURCE_DIR}) set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(PROJ_BINARY_ROOT ${CMAKE_CURRENT_BINARY_DIR}) set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
include(system) include(system)
...@@ -36,8 +36,8 @@ include(simd) ...@@ -36,8 +36,8 @@ include(simd)
################################ Configurations ####################################### ################################ Configurations #######################################
option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND}) option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND})
option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND}) option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND})
option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." ${AVX_FOUND}) option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." OFF)
option(WITH_MKLML "Compile PaddlePaddle with mklml package." ${AVX_FOUND}) option(WITH_MKLML "Compile PaddlePaddle with mklml package." OFF)
option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON)
option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON) option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON)
option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON) option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON)
...@@ -121,8 +121,8 @@ include(version) # set PADDLE_VERSION ...@@ -121,8 +121,8 @@ include(version) # set PADDLE_VERSION
include(coveralls) # set code coverage include(coveralls) # set code coverage
include_directories("${PROJ_ROOT}") include_directories("${PADDLE_SOURCE_DIR}")
include_directories("${PROJ_ROOT}/paddle/cuda/include") include_directories("${PADDLE_SOURCE_DIR}/paddle/cuda/include")
include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto") include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto")
include_directories("${CMAKE_CURRENT_BINARY_DIR}/go/pserver/client/c") include_directories("${CMAKE_CURRENT_BINARY_DIR}/go/pserver/client/c")
include_directories(${Boost_INCLUDE_DIRS}) include_directories(${Boost_INCLUDE_DIRS})
...@@ -144,7 +144,7 @@ if(WITH_GPU) ...@@ -144,7 +144,7 @@ if(WITH_GPU)
endif(WITH_GPU) endif(WITH_GPU)
if(WITH_MKLDNN) if(WITH_MKLDNN)
list(APPEND EXTERNAL_LIBS ${MKLDNN_LIBRARY} ${MKLDNN_IOMP_LIB}) list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB} ${MKLDNN_IOMP_LIB})
endif() endif()
if(USE_NNPACK) if(USE_NNPACK)
...@@ -164,10 +164,12 @@ if(WITH_GOLANG) ...@@ -164,10 +164,12 @@ if(WITH_GOLANG)
add_subdirectory(go) add_subdirectory(go)
endif(WITH_GOLANG) endif(WITH_GOLANG)
set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build")
add_subdirectory(paddle) add_subdirectory(paddle)
if(WITH_PYTHON) if(WITH_PYTHON)
add_subdirectory(python) add_subdirectory(python)
endif() endif()
if(WITH_DOC) if(WITH_DOC)
add_subdirectory(doc) add_subdirectory(doc)
endif() endif()
...@@ -28,7 +28,7 @@ RUN apt-get update && \ ...@@ -28,7 +28,7 @@ RUN apt-get update && \
wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \ wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \
curl sed grep graphviz libjpeg-dev zlib1g-dev \ curl sed grep graphviz libjpeg-dev zlib1g-dev \
python-matplotlib gcc-4.8 g++-4.8 \ python-matplotlib gcc-4.8 g++-4.8 \
automake locales clang-format-3.8 swig doxygen cmake \ automake locales clang-format swig doxygen cmake \
liblapack-dev liblapacke-dev libboost-dev \ liblapack-dev liblapacke-dev libboost-dev \
clang-3.8 llvm-3.8 libclang-3.8-dev \ clang-3.8 llvm-3.8 libclang-3.8-dev \
net-tools && \ net-tools && \
...@@ -64,13 +64,28 @@ RUN pip install --upgrade pip && \ ...@@ -64,13 +64,28 @@ RUN pip install --upgrade pip && \
pip install -U sphinx-rtd-theme==0.1.9 recommonmark && \ pip install -U sphinx-rtd-theme==0.1.9 recommonmark && \
pip install pre-commit 'requests==2.9.2' 'ipython==5.3.0' && \ pip install pre-commit 'requests==2.9.2' 'ipython==5.3.0' && \
pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \ pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
pip install rarfile pip install opencv-python rarfile 'scipy>=0.19.0' 'nltk>=3.2.2'
# To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use # To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use
# the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2 # the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2
RUN apt-get install -y libssl-dev libffi-dev RUN apt-get install -y libssl-dev libffi-dev
RUN pip install certifi urllib3[secure] RUN pip install certifi urllib3[secure]
# TODO(qijun) The template library Eigen doesn't work well with GCC 5
# coming with the default Docker image, so we switch to use GCC 4.8
# by default. And I will check Eigen library later.
RUN ln -sf gcc-4.8 /usr/bin/gcc && \
ln -sf gcc-ar-4.8 /usr/bin/gcc-ar && \
ln -sf gcc-nm-4.8 /usr/bin/gcc-nm && \
ln -sf gcc-ranlib-4.8 /usr/bin/gcc-ranlib && \
ln -sf gcc-4.8 /usr/bin/x86_64-linux-gnu-gcc && \
ln -sf gcc-ar-4.8 /usr/bin/x86_64-linux-gnu-gcc-ar && \
ln -sf gcc-nm-4.8 /usr/bin/x86_64-linux-gnu-gcc-nm && \
ln -sf gcc-ranlib-4.8 /usr/bin/x86_64-linux-gnu-gcc-ranlib && \
ln -sf g++-4.8 /usr/bin/g++ && \
ln -sf g++-4.8 /usr/bin/x86_64-linux-gnu-g++
# Install woboq_codebrowser to /woboq # Install woboq_codebrowser to /woboq
RUN git clone https://github.com/woboq/woboq_codebrowser /woboq && \ RUN git clone https://github.com/woboq/woboq_codebrowser /woboq && \
(cd /woboq \ (cd /woboq \
......
...@@ -129,7 +129,7 @@ if(WITH_GOLANG) ...@@ -129,7 +129,7 @@ if(WITH_GOLANG)
add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/glide add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/glide
COMMAND env GOPATH=${GOPATH} ${GLIDE} install COMMAND env GOPATH=${GOPATH} ${GLIDE} install
COMMAND touch ${CMAKE_BINARY_DIR}/glide COMMAND touch ${CMAKE_BINARY_DIR}/glide
DEPENDS ${PROJ_ROOT}/go/glide.lock DEPENDS ${PADDLE_SOURCE_DIR}/go/glide.lock
WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go" WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go"
) )
......
...@@ -52,7 +52,7 @@ macro(add_style_check_target TARGET_NAME) ...@@ -52,7 +52,7 @@ macro(add_style_check_target TARGET_NAME)
if(SOURCES_LIST) if(SOURCES_LIST)
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMAND "${PYTHON_EXECUTABLE}" "${PROJ_ROOT}/paddle/scripts/cpplint.py" COMMAND "${PYTHON_EXECUTABLE}" "${PADDLE_SOURCE_DIR}/paddle/scripts/cpplint.py"
"--filter=${STYLE_FILTER}" "--filter=${STYLE_FILTER}"
${SOURCES_LIST} ${SOURCES_LIST}
COMMENT "cpplint: Checking source code style" COMMENT "cpplint: Checking source code style"
......
...@@ -9,11 +9,13 @@ function(CheckCompilerCXX11Flag) ...@@ -9,11 +9,13 @@ function(CheckCompilerCXX11Flag)
if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.8) if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.8)
message(FATAL_ERROR "Unsupported GCC version. GCC >= 4.8 required.") message(FATAL_ERROR "Unsupported GCC version. GCC >= 4.8 required.")
endif() endif()
if(NOT ANDROID)
# TODO(qijun) gcc 4.9 or later versions raise SEGV due to the optimization problem. # TODO(qijun) gcc 4.9 or later versions raise SEGV due to the optimization problem.
# Use Debug mode instead for now. # Use Debug mode instead for now.
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 4.9) if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 4.9)
set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "" FORCE) set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "" FORCE)
endif() endif()
endif()
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
# cmake >= 3.0 compiler id "AppleClang" on Mac OS X, otherwise "Clang" # cmake >= 3.0 compiler id "AppleClang" on Mac OS X, otherwise "Clang"
# Apple Clang is a different compiler than upstream Clang which havs different version numbers. # Apple Clang is a different compiler than upstream Clang which havs different version numbers.
......
...@@ -411,7 +411,7 @@ function(py_test TARGET_NAME) ...@@ -411,7 +411,7 @@ function(py_test TARGET_NAME)
set(multiValueArgs SRCS DEPS) set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_test(NAME ${TARGET_NAME} add_test(NAME ${TARGET_NAME}
COMMAND env PYTHONPATH=${PADDLE_PYTHON_PACKAGE_DIR} COMMAND env PYTHONPATH=${PADDLE_PYTHON_BUILD_DIR}/lib-python
python2 ${py_test_SRCS} python2 ${py_test_SRCS}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
endif() endif()
......
...@@ -12,7 +12,7 @@ set(CPACK_PACKAGE_DESCRIPTION "") ...@@ -12,7 +12,7 @@ set(CPACK_PACKAGE_DESCRIPTION "")
set(CPACK_DEBIAN_PACKAGE_DEPENDS "libpython2.7-dev, libstdc++6, python-pip, curl, libgfortran3, python-pip-whl") set(CPACK_DEBIAN_PACKAGE_DEPENDS "libpython2.7-dev, libstdc++6, python-pip, curl, libgfortran3, python-pip-whl")
set(CPACK_DEBIAN_PACKAGE_SECTION Devel) set(CPACK_DEBIAN_PACKAGE_SECTION Devel)
set(CPACK_DEBIAN_PACKAGE_VERSION ${PADDLE_VERSION}) set(CPACK_DEBIAN_PACKAGE_VERSION ${PADDLE_VERSION})
set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJ_ROOT}/paddle/scripts/deb/postinst") set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PADDLE_SOURCE_DIR}/paddle/scripts/deb/postinst")
#set(CPACK_GENERATOR "DEB") #set(CPACK_GENERATOR "DEB")
# Start cpack # Start cpack
include (CMakePackageConfigHelpers) include (CMakePackageConfigHelpers)
......
...@@ -141,8 +141,8 @@ endmacro() ...@@ -141,8 +141,8 @@ endmacro()
function(create_resources res_file output_file) function(create_resources res_file output_file)
add_custom_command( add_custom_command(
OUTPUT ${output_file} OUTPUT ${output_file}
COMMAND python ARGS ${PROJ_ROOT}/cmake/make_resource.py ${res_file} ${output_file} COMMAND python ARGS ${PADDLE_SOURCE_DIR}/cmake/make_resource.py ${res_file} ${output_file}
DEPENDS ${res_file} ${PROJ_ROOT}/cmake/make_resource.py) DEPENDS ${res_file} ${PADDLE_SOURCE_DIR}/cmake/make_resource.py)
endfunction() endfunction()
......
...@@ -4,7 +4,7 @@ set(tmp_version "HEAD") ...@@ -4,7 +4,7 @@ set(tmp_version "HEAD")
while ("${PADDLE_VERSION}" STREQUAL "") while ("${PADDLE_VERSION}" STREQUAL "")
execute_process( execute_process(
COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0 ${tmp_version} COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0 ${tmp_version}
WORKING_DIRECTORY ${PROJ_ROOT} WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}
OUTPUT_VARIABLE GIT_TAG_NAME OUTPUT_VARIABLE GIT_TAG_NAME
RESULT_VARIABLE GIT_RESULT RESULT_VARIABLE GIT_RESULT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# serve to show the default. # serve to show the default.
import sys import sys
import os, subprocess import os, subprocess
sys.path.insert(0, os.path.abspath('@PROJ_ROOT@/python')) sys.path.insert(0, os.path.abspath('@PADDLE_SOURCE_DIR@/python'))
import shlex import shlex
from recommonmark import parser, transform from recommonmark import parser, transform
import paddle import paddle
...@@ -24,7 +24,7 @@ AutoStructify = transform.AutoStructify ...@@ -24,7 +24,7 @@ AutoStructify = transform.AutoStructify
# If extensions (or modules to document with autodoc) are in another directory, # If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the # add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here. # documentation root, use os.path.abspath to make it absolute, like shown here.
templates_path = ["@PROJ_ROOT@/doc_theme/templates"] templates_path = ["@PADDLE_SOURCE_DIR@/doc_theme/templates"]
# -- General configuration ------------------------------------------------ # -- General configuration ------------------------------------------------
...@@ -120,7 +120,7 @@ html_theme = 'sphinx_rtd_theme' ...@@ -120,7 +120,7 @@ html_theme = 'sphinx_rtd_theme'
# Add any paths that contain custom static files (such as style sheets) here, # Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files, # relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css". # so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['@PROJ_ROOT@/doc_theme/static'] html_static_path = ['@PADDLE_SOURCE_DIR@/doc_theme/static']
# Output file base name for HTML help builder. # Output file base name for HTML help builder.
htmlhelp_basename = project + 'doc' htmlhelp_basename = project + 'doc'
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# serve to show the default. # serve to show the default.
import sys import sys
import os, subprocess import os, subprocess
sys.path.insert(0, os.path.abspath('@PROJ_ROOT@/python')) sys.path.insert(0, os.path.abspath('@PADDLE_SOURCE_DIR@/python'))
import shlex import shlex
from recommonmark import parser, transform from recommonmark import parser, transform
import paddle import paddle
...@@ -25,7 +25,7 @@ AutoStructify = transform.AutoStructify ...@@ -25,7 +25,7 @@ AutoStructify = transform.AutoStructify
# If extensions (or modules to document with autodoc) are in another directory, # If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the # add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here. # documentation root, use os.path.abspath to make it absolute, like shown here.
templates_path = ["@PROJ_ROOT@/doc_theme/templates"] templates_path = ["@PADDLE_SOURCE_DIR@/doc_theme/templates"]
# -- General configuration ------------------------------------------------ # -- General configuration ------------------------------------------------
...@@ -120,7 +120,7 @@ html_theme = 'sphinx_rtd_theme' ...@@ -120,7 +120,7 @@ html_theme = 'sphinx_rtd_theme'
# Add any paths that contain custom static files (such as style sheets) here, # Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files, # relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css". # so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['@PROJ_ROOT@/doc_theme/static'] html_static_path = ['@PADDLE_SOURCE_DIR@/doc_theme/static']
# Output file base name for HTML help builder. # Output file base name for HTML help builder.
htmlhelp_basename = project + 'doc' htmlhelp_basename = project + 'doc'
......
hash: 1b9b07408ca7fac27a374dc2ccd2433e4bff090484008a037df967284949a582 hash: 1b9b07408ca7fac27a374dc2ccd2433e4bff090484008a037df967284949a582
updated: 2017-08-03T21:46:51.744995189Z updated: 2017-08-07T23:37:48.867469328Z
imports: imports:
- name: github.com/beorn7/perks - name: github.com/beorn7/perks
version: 4c0e84591b9aa9e6dcfdf3e020114cd81f89d5f9 version: 4c0e84591b9aa9e6dcfdf3e020114cd81f89d5f9
...@@ -10,7 +10,7 @@ imports: ...@@ -10,7 +10,7 @@ imports:
- name: github.com/cockroachdb/cmux - name: github.com/cockroachdb/cmux
version: 112f0506e7743d64a6eb8fedbcff13d9979bbf92 version: 112f0506e7743d64a6eb8fedbcff13d9979bbf92
- name: github.com/coreos/etcd - name: github.com/coreos/etcd
version: c31bec0f29facff13f7c3e3d948e55dd6689ed42 version: d0d1a87aa96ae14914751d42264262cb69eda170
subpackages: subpackages:
- alarm - alarm
- auth - auth
...@@ -24,6 +24,7 @@ imports: ...@@ -24,6 +24,7 @@ imports:
- error - error
- etcdserver - etcdserver
- etcdserver/api - etcdserver/api
- etcdserver/api/etcdhttp
- etcdserver/api/v2http - etcdserver/api/v2http
- etcdserver/api/v2http/httptypes - etcdserver/api/v2http/httptypes
- etcdserver/api/v3client - etcdserver/api/v3client
...@@ -210,11 +211,6 @@ testImports: ...@@ -210,11 +211,6 @@ testImports:
version: 04cdfd42973bb9c8589fd6a731800cf222fde1a9 version: 04cdfd42973bb9c8589fd6a731800cf222fde1a9
subpackages: subpackages:
- spew - spew
- name: github.com/docker/docker
version: b6d164e6c46d8115b146e4c3ac93784e9ef8b49e
subpackages:
- pkg/ioutils
- pkg/longpath
- name: github.com/pmezard/go-difflib - name: github.com/pmezard/go-difflib
version: d8ed2627bdf02c080bf22230dbb337003b7aba2d version: d8ed2627bdf02c080bf22230dbb337003b7aba2d
subpackages: subpackages:
......
package master_test package master_test
import ( import (
"io/ioutil"
"net/url"
"os" "os"
"strings"
"testing" "testing"
"time" "time"
"github.com/PaddlePaddle/Paddle/go/master" "github.com/PaddlePaddle/Paddle/go/master"
"github.com/coreos/etcd/clientv3" "github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/embed" "github.com/coreos/etcd/embed"
"github.com/docker/docker/pkg/ioutils"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
) )
func TestNewServiceWithEtcd(t *testing.T) { func TestNewServiceWithEtcd(t *testing.T) {
// setup an embed etcd server // setup an embed etcd server
etcdDir, err := ioutils.TempDir("", "") etcdDir, err := ioutil.TempDir("", "")
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
cfg := embed.NewConfig() cfg := embed.NewConfig()
lpurl, _ := url.Parse("http://localhost:0")
lcurl, _ := url.Parse("http://localhost:0")
cfg.LPUrls = []url.URL{*lpurl}
cfg.LCUrls = []url.URL{*lcurl}
cfg.Dir = etcdDir cfg.Dir = etcdDir
e, err := embed.StartEtcd(cfg) e, err := embed.StartEtcd(cfg)
if err != nil { if err != nil {
...@@ -30,15 +36,13 @@ func TestNewServiceWithEtcd(t *testing.T) { ...@@ -30,15 +36,13 @@ func TestNewServiceWithEtcd(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
}() }()
select {
case <-e.Server.ReadyNotify():
t.Log("Server is ready!")
case <-time.After(60 * time.Second):
e.Server.Stop() // trigger a shutdown
t.Fatal("Server took too long to start!")
}
ep := []string{"127.0.0.1:2379"} <-e.Server.ReadyNotify()
port := strings.Split(e.Clients[0].Addr().String(), ":")[1]
endpoint := "127.0.0.1:" + port
ep := []string{endpoint}
masterAddr := "127.0.0.1:3306" masterAddr := "127.0.0.1:3306"
store, err := master.NewEtcdClient(ep, masterAddr, master.DefaultLockPath, master.DefaultAddrPath, master.DefaultStatePath, 30) store, err := master.NewEtcdClient(ep, masterAddr, master.DefaultLockPath, master.DefaultAddrPath, master.DefaultStatePath, 30)
if err != nil { if err != nil {
......
...@@ -90,8 +90,12 @@ func cArrayToSlice(p unsafe.Pointer, len int) []byte { ...@@ -90,8 +90,12 @@ func cArrayToSlice(p unsafe.Pointer, len int) []byte {
type selector bool type selector bool
func (s selector) Select() bool { func (s selector) Select() (bool, error) {
return bool(s) return bool(s), nil
}
func (s selector) Done() error {
return nil
} }
type lister []client.Server type lister []client.Server
...@@ -114,11 +118,10 @@ func paddle_new_pserver_client(addrs *C.char, selected int) C.paddle_pserver_cli ...@@ -114,11 +118,10 @@ func paddle_new_pserver_client(addrs *C.char, selected int) C.paddle_pserver_cli
} }
//export paddle_new_etcd_pserver_client //export paddle_new_etcd_pserver_client
func paddle_new_etcd_pserver_client(etcdEndpoints *C.char, selected int) C.paddle_pserver_client { func paddle_new_etcd_pserver_client(etcdEndpoints *C.char) C.paddle_pserver_client {
// TODO(Longfei: use etcd lock to decide which trainer to initialize the parameters)
addr := C.GoString(etcdEndpoints) addr := C.GoString(etcdEndpoints)
etcdClient := client.NewEtcd(addr) etcdClient := client.NewEtcd(addr)
c := client.NewClient(etcdClient, etcdClient.Desired(), selector(selected != 0)) c := client.NewClient(etcdClient, etcdClient.Desired(), etcdClient)
return add(c) return add(c)
} }
...@@ -136,7 +139,12 @@ func paddle_pserver_client_release(client C.paddle_pserver_client) { ...@@ -136,7 +139,12 @@ func paddle_pserver_client_release(client C.paddle_pserver_client) {
//export paddle_begin_init_params //export paddle_begin_init_params
func paddle_begin_init_params(client C.paddle_pserver_client) C.int { func paddle_begin_init_params(client C.paddle_pserver_client) C.int {
c := get(client) c := get(client)
if selected := c.BeginInitParams(); selected { selected, err := c.BeginInitParams()
if err != nil {
panic(err)
}
if selected {
return 1 return 1
} }
return 0 return 0
......
...@@ -17,12 +17,10 @@ def main(): ...@@ -17,12 +17,10 @@ def main():
# network config # network config
x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13)) x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13))
y_predict = paddle.layer.fc(input=x, y_predict = paddle.layer.fc(input=x,
param_attr=paddle.attr.Param( param_attr=paddle.attr.Param(name='w'),
name='w', learning_rate=1e-3),
size=1, size=1,
act=paddle.activation.Linear(), act=paddle.activation.Linear(),
bias_attr=paddle.attr.Param( bias_attr=paddle.attr.Param(name='b'))
name='b', learning_rate=1e-3))
y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1)) y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1))
cost = paddle.layer.mse_cost(input=y_predict, label=y) cost = paddle.layer.mse_cost(input=y_predict, label=y)
......
...@@ -27,9 +27,13 @@ import ( ...@@ -27,9 +27,13 @@ import (
// TODO(helin): add RPC call retry logic // TODO(helin): add RPC call retry logic
// Selector selects if the client should initialize parameter servers. // Selector selects if the client should initialize parameters and
// reports the initialization process done.
type Selector interface { type Selector interface {
Select() bool // Select selects if the client should initialize parameter servers.
Select() (bool, error)
// Done indicates the initialization process is done.
Done() error
} }
// Server is the identification of a parameter Server. // Server is the identification of a parameter Server.
...@@ -115,7 +119,7 @@ func (c *Client) monitorPservers(l Lister, pserverNum int) { ...@@ -115,7 +119,7 @@ func (c *Client) monitorPservers(l Lister, pserverNum int) {
// servers. Other trainers will be blocked until the initialization is // servers. Other trainers will be blocked until the initialization is
// done, and they need to get the initialized parameters from // done, and they need to get the initialized parameters from
// parameter servers using GetParams. // parameter servers using GetParams.
func (c *Client) BeginInitParams() bool { func (c *Client) BeginInitParams() (bool, error) {
return c.sel.Select() return c.sel.Select()
} }
......
...@@ -124,8 +124,12 @@ func initEtcdClient() { ...@@ -124,8 +124,12 @@ func initEtcdClient() {
type selector bool type selector bool
func (s selector) Select() bool { func (s selector) Select() (bool, error) {
return bool(s) return bool(s), nil
}
func (s selector) Done() error {
return nil
} }
type lister []client.Server type lister []client.Server
...@@ -135,7 +139,11 @@ func (l lister) List() []client.Server { ...@@ -135,7 +139,11 @@ func (l lister) List() []client.Server {
} }
func testClient(t *testing.T, c *client.Client) { func testClient(t *testing.T, c *client.Client) {
selected := c.BeginInitParams() selected, err := c.BeginInitParams()
if err != nil {
t.Fatal(err)
}
if !selected { if !selected {
t.Fatal("should be selected.") t.Fatal("should be selected.")
} }
......
...@@ -16,53 +16,60 @@ package client ...@@ -16,53 +16,60 @@ package client
import ( import (
"context" "context"
"errors"
"fmt"
"strconv" "strconv"
"strings" "strings"
"time" "time"
"github.com/PaddlePaddle/Paddle/go/pserver" "github.com/PaddlePaddle/Paddle/go/pserver"
"github.com/coreos/etcd/clientv3" "github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/clientv3/concurrency"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
) )
const ( const (
defaultEtcdTimeout time.Duration = 5 * time.Second defaultEtcdTimeout time.Duration = 5 * time.Second
initLockPath = "/init_ps/lock"
initDonePath = "/init_ps/done"
initDoneVal = "1"
) )
// EtcdClient is used by pserver client that is a part of trainer process. // Etcd is used by pserver client that is a part of trainer process.
// TODO: // TODO:
// 1. add watcher to watch the change state of pservers) // 1. add watcher to watch the change state of pservers.
// 1. add etcd lock) type Etcd struct {
type EtcdClient struct {
client *clientv3.Client client *clientv3.Client
timeout time.Duration timeout time.Duration
endpoints []string endpoints []string
lock *concurrency.Mutex
} }
// Desired read ps desired number from etcd. // Desired read ps desired number from etcd.
func (p *EtcdClient) Desired() int { func (e *Etcd) Desired() int {
var psDesired int var psDesired int
for { for {
ctx, cancel := context.WithTimeout(context.Background(), p.timeout) ctx, cancel := context.WithTimeout(context.Background(), e.timeout)
resp, err := p.client.Get(ctx, pserver.PsDesired) resp, err := e.client.Get(ctx, pserver.PsDesired)
cancel() cancel()
if err != nil { if err != nil {
log.Errorf("Get ps dresire number failed! recnnectiong..., %v", err) log.Errorf("Get ps dresire number failed! recnnectiong..., %v", err)
time.Sleep(p.timeout) time.Sleep(e.timeout)
continue continue
} }
kvs := resp.Kvs kvs := resp.Kvs
if len(kvs) == 0 { if len(kvs) == 0 {
log.Infoln("Waiting for ps desired registered ...") log.Infoln("Waiting for ps desired registered ...")
time.Sleep(p.timeout) time.Sleep(e.timeout)
continue continue
} }
psDesired, err = strconv.Atoi(string(resp.Kvs[0].Value)) psDesired, err = strconv.Atoi(string(resp.Kvs[0].Value))
if err != nil { if err != nil {
log.Errorf("psDesired %d invalid %v", psDesired, err) log.Errorf("psDesired %d invalid %v", psDesired, err)
time.Sleep(p.timeout) time.Sleep(e.timeout)
continue continue
} }
...@@ -73,26 +80,26 @@ func (p *EtcdClient) Desired() int { ...@@ -73,26 +80,26 @@ func (p *EtcdClient) Desired() int {
} }
// List return the pserver list read from etcd. // List return the pserver list read from etcd.
func (p *EtcdClient) List() []Server { func (e *Etcd) List() []Server {
psDesired := p.Desired() psDesired := e.Desired()
servers := make([]Server, psDesired) servers := make([]Server, psDesired)
for { for {
for i := 0; i < psDesired; i++ { for i := 0; i < psDesired; i++ {
ctx, cancel := context.WithTimeout(context.Background(), p.timeout) ctx, cancel := context.WithTimeout(context.Background(), e.timeout)
psKey := pserver.PsPath + strconv.Itoa(i) psKey := pserver.PsPath + strconv.Itoa(i)
log.Debugf("checking %s", psKey) log.Debugf("checking %s", psKey)
resp, err := p.client.Get(ctx, psKey) resp, err := e.client.Get(ctx, psKey)
cancel() cancel()
if err != nil { if err != nil {
log.Infof("Get psKey= %s error, %v", psKey, err) log.Infof("Get psKey= %s error, %v", psKey, err)
time.Sleep(p.timeout) time.Sleep(e.timeout)
continue continue
} }
kvs := resp.Kvs kvs := resp.Kvs
if len(kvs) == 0 { if len(kvs) == 0 {
log.Infof("Waiting for ps addr registered ...") log.Infof("Waiting for ps addr registered ...")
time.Sleep(p.timeout) time.Sleep(e.timeout)
continue continue
} }
...@@ -100,7 +107,7 @@ func (p *EtcdClient) List() []Server { ...@@ -100,7 +107,7 @@ func (p *EtcdClient) List() []Server {
// TODO(Longfei) check the ps address // TODO(Longfei) check the ps address
if psAddr == "" { if psAddr == "" {
log.Infof("Get psKey = %s, psAddr is empty", psKey) log.Infof("Get psKey = %s, psAddr is empty", psKey)
time.Sleep(p.timeout) time.Sleep(e.timeout)
continue continue
} }
log.Debugf("got value (%s) for key: %s", psAddr, psKey) log.Debugf("got value (%s) for key: %s", psAddr, psKey)
...@@ -113,7 +120,7 @@ func (p *EtcdClient) List() []Server { ...@@ -113,7 +120,7 @@ func (p *EtcdClient) List() []Server {
} }
// NewEtcd create a etcd client to return the state of pserver on etcd. // NewEtcd create a etcd client to return the state of pserver on etcd.
func NewEtcd(endpoints string) *EtcdClient { func NewEtcd(endpoints string) *Etcd {
ep := strings.Split(endpoints, ",") ep := strings.Split(endpoints, ",")
var cli *clientv3.Client var cli *clientv3.Client
var err error var err error
...@@ -130,10 +137,118 @@ func NewEtcd(endpoints string) *EtcdClient { ...@@ -130,10 +137,118 @@ func NewEtcd(endpoints string) *EtcdClient {
break break
} }
log.Infof("Connected to etcd: %s\n", endpoints) log.Infof("Connected to etcd: %s\n", endpoints)
client := &EtcdClient{ client := &Etcd{
client: cli, client: cli,
timeout: defaultEtcdTimeout, timeout: defaultEtcdTimeout,
endpoints: ep, endpoints: ep,
} }
return client return client
} }
// Select indicates if the current trainer is selected to initialize
// the pserver parameters.
func (e *Etcd) Select() (bool, error) {
sess, err := concurrency.NewSession(e.client, concurrency.WithTTL(5))
if err != nil {
return false, err
}
lock := concurrency.NewMutex(sess, initLockPath)
log.Infof("Trying to acquire lock at %s.", initLockPath)
// Do not use timeout context here, since we don't know how
// long does it take for other trainers to initialize the
// parameters.
err = lock.Lock(context.Background())
if err != nil {
return false, err
}
log.Infof("Successfully acquired lock at %s.", initLockPath)
get := clientv3.OpGet(initDonePath)
ctx, cancel := context.WithTimeout(context.Background(), e.timeout)
tresp, err := e.client.Txn(ctx).If(lock.IsOwner()).Then(get).Commit()
cancel()
if err != nil {
return false, err
}
if !tresp.Succeeded {
return false, errors.New("no longer the owner of the lock")
}
resp := tresp.Responses[0].GetResponseRange()
if len(resp.Kvs) == 0 {
// Key value not set, select current trainer.
e.lock = lock
log.Infoln("Trainer selected.")
return true, nil
}
if string(resp.Kvs[0].Value) == initDoneVal {
log.Infoln("Initialization is already done.")
ctx, cancel = context.WithTimeout(context.Background(), e.timeout)
err = lock.Unlock(ctx)
cancel()
if err != nil {
log.Errorln(err)
}
return false, nil
}
return false, fmt.Errorf("key %s have unexpected value: %v", initDonePath, resp.Kvs[0].Value)
}
// Done indicates the parameter initialization process is done.
func (e *Etcd) Done() error {
if e.lock == nil {
return errors.New("lock is nil, Done called unexpectedly")
}
put := clientv3.OpPut(initDonePath, initDoneVal)
ctx, cancel := context.WithTimeout(context.Background(), e.timeout)
tresp, err := e.client.Txn(ctx).If(e.lock.IsOwner()).Then(put).Commit()
cancel()
if err != nil {
return err
}
if !tresp.Succeeded {
return errors.New("no longer the owner of the lock")
}
ctx, cancel = context.WithTimeout(context.Background(), e.timeout)
err = e.lock.Unlock(ctx)
cancel()
if err != nil {
log.Errorln(err)
} else {
e.lock = nil
}
return nil
}
// Close closes the etcd client.
func (e *Etcd) Close() error {
var err error
if e.lock != nil {
ctx, cancel := context.WithTimeout(context.Background(), e.timeout)
err = e.lock.Unlock(ctx)
cancel()
if err == nil {
e.lock = nil
}
}
cErr := e.client.Close()
if cErr != nil {
if err != nil {
log.Errorln(cErr)
return err
}
return cErr
}
return err
}
package client_test
import (
"io/ioutil"
"net/url"
"os"
"strings"
"sync"
"testing"
"github.com/PaddlePaddle/Paddle/go/pserver/client"
"github.com/coreos/etcd/embed"
)
func TestSelector(t *testing.T) {
etcdDir, err := ioutil.TempDir("", "")
if err != nil {
t.Fatal(err)
}
cfg := embed.NewConfig()
lpurl, _ := url.Parse("http://localhost:0")
lcurl, _ := url.Parse("http://localhost:0")
cfg.LPUrls = []url.URL{*lpurl}
cfg.LCUrls = []url.URL{*lcurl}
cfg.Dir = etcdDir
e, err := embed.StartEtcd(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
e.Close()
if err := os.RemoveAll(etcdDir); err != nil {
t.Fatal(err)
}
}()
<-e.Server.ReadyNotify()
port := strings.Split(e.Clients[0].Addr().String(), ":")[1]
endpoint := "127.0.0.1:" + port
var mu sync.Mutex
selectedCount := 0
var wg sync.WaitGroup
selectAndDone := func(c *client.Etcd) {
defer wg.Done()
selected, err := c.Select()
if err != nil {
panic(err)
}
if selected {
mu.Lock()
selectedCount++
mu.Unlock()
err = c.Done()
if err != nil {
t.Fatal(err)
}
}
}
c0 := client.NewEtcd(endpoint)
c1 := client.NewEtcd(endpoint)
c2 := client.NewEtcd(endpoint)
c3 := client.NewEtcd(endpoint)
wg.Add(3)
go selectAndDone(c0)
go selectAndDone(c1)
go selectAndDone(c2)
wg.Wait()
// simulate trainer crashed and restarted after the
// initialization process.
wg.Add(1)
go selectAndDone(c3)
wg.Wait()
mu.Lock()
if selectedCount != 1 {
t.Fatal("selected count wrong:", selectedCount)
}
mu.Unlock()
err = c0.Close()
if err != nil {
t.Fatal(err)
}
err = c1.Close()
if err != nil {
t.Fatal(err)
}
err = c2.Close()
if err != nil {
t.Fatal(err)
}
err = c3.Close()
if err != nil {
t.Fatal(err)
}
}
...@@ -19,9 +19,9 @@ add_library(paddle_api STATIC ${API_SOURCES}) ...@@ -19,9 +19,9 @@ add_library(paddle_api STATIC ${API_SOURCES})
add_dependencies(paddle_api paddle_proto paddle_trainer_lib) add_dependencies(paddle_api paddle_proto paddle_trainer_lib)
INCLUDE(${SWIG_USE_FILE}) INCLUDE(${SWIG_USE_FILE})
INCLUDE_DIRECTORIES(${PROJ_ROOT}/paddle) INCLUDE_DIRECTORIES(${PADDLE_SOURCE_DIR}/paddle)
FILE(GLOB PY_PADDLE_PYTHON_FILES ${PROJ_ROOT}/paddle/py_paddle/*.py) FILE(GLOB PY_PADDLE_PYTHON_FILES ${PADDLE_SOURCE_DIR}/paddle/py_paddle/*.py)
SET_SOURCE_FILES_PROPERTIES(Paddle.i PROPERTIES CPLUSPLUS ON) SET_SOURCE_FILES_PROPERTIES(Paddle.i PROPERTIES CPLUSPLUS ON)
...@@ -79,16 +79,16 @@ SWIG_LINK_LIBRARIES(swig_paddle ...@@ -79,16 +79,16 @@ SWIG_LINK_LIBRARIES(swig_paddle
${START_END} ${START_END}
) )
add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so add_custom_command(OUTPUT ${PADDLE_SOURCE_DIR}/paddle/py_paddle/_swig_paddle.so
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/swig_paddle.py ${PROJ_ROOT}/paddle/py_paddle COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/swig_paddle.py ${PADDLE_SOURCE_DIR}/paddle/py_paddle
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/_swig_paddle.so ${PROJ_ROOT}/paddle/py_paddle COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/_swig_paddle.so ${PADDLE_SOURCE_DIR}/paddle/py_paddle
COMMAND ${CMAKE_COMMAND} -E touch .timestamp COMMAND ${CMAKE_COMMAND} -E touch .timestamp
WORKING_DIRECTORY ${PROJ_ROOT}/paddle WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle
DEPENDS _swig_paddle DEPENDS _swig_paddle
) )
# TODO(yuyang18) : make wheel name calculated by cmake # TODO(yuyang18) : make wheel name calculated by cmake
add_custom_target(python_api_wheel ALL DEPENDS ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so) add_custom_target(python_api_wheel ALL DEPENDS ${PADDLE_SOURCE_DIR}/paddle/py_paddle/_swig_paddle.so)
if(WITH_TESTING) if(WITH_TESTING)
IF(NOT PY_PIP_FOUND) IF(NOT PY_PIP_FOUND)
......
...@@ -41,7 +41,7 @@ ParameterUpdater *ParameterUpdater::createNewRemoteUpdater( ...@@ -41,7 +41,7 @@ ParameterUpdater *ParameterUpdater::createNewRemoteUpdater(
config->m->getConfig(), pserverSpec, useEtcd)); config->m->getConfig(), pserverSpec, useEtcd));
return updater; return updater;
#else #else
throw UnsupportError(); throw UnsupportError("not compiled with WITH_GOLANG");
#endif #endif
} }
......
...@@ -90,6 +90,18 @@ paddle_error paddle_arguments_set_ids(paddle_arguments args, ...@@ -90,6 +90,18 @@ paddle_error paddle_arguments_set_ids(paddle_arguments args,
return kPD_NO_ERROR; return kPD_NO_ERROR;
} }
paddle_error paddle_arguments_set_frame_shape(paddle_arguments args,
uint64_t ID,
uint64_t frameHeight,
uint64_t frameWidth) {
if (args == nullptr) return kPD_NULLPTR;
auto a = castArg(args);
if (ID >= a->args.size()) return kPD_OUT_OF_RANGE;
a->args[ID].setFrameHeight(frameHeight);
a->args[ID].setFrameWidth(frameWidth);
return kPD_NO_ERROR;
}
paddle_error paddle_arguments_set_sequence_start_pos(paddle_arguments args, paddle_error paddle_arguments_set_sequence_start_pos(paddle_arguments args,
uint64_t ID, uint64_t ID,
uint32_t nestedLevel, uint32_t nestedLevel,
......
...@@ -111,6 +111,20 @@ PD_API paddle_error paddle_arguments_set_ids(paddle_arguments args, ...@@ -111,6 +111,20 @@ PD_API paddle_error paddle_arguments_set_ids(paddle_arguments args,
uint64_t ID, uint64_t ID,
paddle_ivector ids); paddle_ivector ids);
/**
* @brief paddle_arguments_set_frame_shape Set the fram size of one argument
* in array, which index is `ID`.
* @param [in] args arguments array
* @param [in] ID array index
* @param [in] frameHeight maximum height of input images
* @param [in] frameWidth maximum width of input images
* @return paddle_error
*/
PD_API paddle_error paddle_arguments_set_frame_shape(paddle_arguments args,
uint64_t ID,
uint64_t frameHeight,
uint64_t frameWidth);
/** /**
* @brief PDArgsSetSequenceStartPos Set sequence start position vector of one * @brief PDArgsSetSequenceStartPos Set sequence start position vector of one
* argument in array, which index is `ID`. * argument in array, which index is `ID`.
......
...@@ -7,14 +7,17 @@ ...@@ -7,14 +7,17 @@
do { \ do { \
paddle_error __err__ = stmt; \ paddle_error __err__ = stmt; \
if (__err__ != kPD_NO_ERROR) { \ if (__err__ != kPD_NO_ERROR) { \
fprintf(stderr, "Invoke paddle error %d \n" #stmt, __err__); \ fprintf(stderr, "Invoke paddle error %d in " #stmt "\n", __err__); \
exit(__err__); \ exit(__err__); \
} \ } \
} while (0) } while (0)
void* read_config(const char* filename, long* size) { void* read_config(const char* filename, long* size) {
FILE* file = fopen(filename, "r"); FILE* file = fopen(filename, "r");
if (file == NULL) return NULL; if (file == NULL) {
fprintf(stderr, "Open %s error\n", filename);
return NULL;
}
fseek(file, 0L, SEEK_END); fseek(file, 0L, SEEK_END);
*size = ftell(file); *size = ftell(file);
fseek(file, 0L, SEEK_SET); fseek(file, 0L, SEEK_SET);
......
...@@ -54,6 +54,31 @@ paddle_error paddle_gradient_machine_create_for_inference( ...@@ -54,6 +54,31 @@ paddle_error paddle_gradient_machine_create_for_inference(
return kPD_NO_ERROR; return kPD_NO_ERROR;
} }
paddle_error paddle_gradient_machine_create_for_inference_with_parameters(
paddle_gradient_machine* machine, void* mergedModel, uint64_t size) {
if (mergedModel == nullptr) return kPD_NULLPTR;
std::istringstream is(std::string(static_cast<char*>(mergedModel), size));
int64_t modelConfigSize = 0;
is.read((char*)(&modelConfigSize), sizeof(modelConfigSize));
std::string modelConfigProtobuf;
modelConfigProtobuf.resize(modelConfigSize);
is.read(&modelConfigProtobuf[0], modelConfigSize);
paddle::TrainerConfig config;
if (!config.ParseFromString(modelConfigProtobuf) || !config.IsInitialized()) {
return kPD_PROTOBUF_ERROR;
}
auto ptr = new paddle::capi::CGradientMachine();
ptr->machine.reset(paddle::GradientMachine::create(
config.model_config(), CREATE_MODE_TESTING, {paddle::PARAMETER_VALUE}));
std::vector<paddle::ParameterPtr>& parameters = ptr->machine->getParameters();
for (auto& para : parameters) {
para->load(is);
}
*machine = ptr;
return kPD_NO_ERROR;
}
paddle_error paddle_gradient_machine_destroy(paddle_gradient_machine machine) { paddle_error paddle_gradient_machine_destroy(paddle_gradient_machine machine) {
delete cast(machine); delete cast(machine);
return kPD_NO_ERROR; return kPD_NO_ERROR;
......
...@@ -36,6 +36,18 @@ typedef void* paddle_gradient_machine; ...@@ -36,6 +36,18 @@ typedef void* paddle_gradient_machine;
PD_API paddle_error paddle_gradient_machine_create_for_inference( PD_API paddle_error paddle_gradient_machine_create_for_inference(
paddle_gradient_machine* machine, void* modelConfigProtobuf, int size); paddle_gradient_machine* machine, void* modelConfigProtobuf, int size);
/**
* @brief Create a gradient machine used for model inference, using config with
* parameters which is generated by `paddle merge_model`.
* @param [out] machine that used for model inference.
* @param [in] mergedModel
* @param [in] size
* @return paddle_error
*/
PD_API paddle_error
paddle_gradient_machine_create_for_inference_with_parameters(
paddle_gradient_machine* machine, void* mergedModel, uint64_t size);
/** /**
* @brief Load parameter from disk. * @brief Load parameter from disk.
* @param machine Gradient Machine. * @param machine Gradient Machine.
......
...@@ -10,5 +10,5 @@ target_include_directories(capi_test_gradientMachine PUBLIC ...@@ -10,5 +10,5 @@ target_include_directories(capi_test_gradientMachine PUBLIC
${PADDLE_CAPI_INC_PATH}) ${PADDLE_CAPI_INC_PATH})
target_link_libraries(capi_test_gradientMachine paddle_capi) target_link_libraries(capi_test_gradientMachine paddle_capi)
add_test(NAME capi_test_gradientMachine add_test(NAME capi_test_gradientMachine
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python ${CMAKE_CURRENT_BINARY_DIR}/capi_test_gradientMachine COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/capi_test_gradientMachine
WORKING_DIRECTORY ${PROJ_ROOT}/paddle/capi/tests) WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/capi/tests)
...@@ -7,6 +7,9 @@ cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context) ...@@ -7,6 +7,9 @@ cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context)
cc_test(tensor_test SRCS tensor_test.cc DEPS tensor) cc_test(tensor_test SRCS tensor_test.cc DEPS tensor)
cc_test(eigen_test SRCS eigen_test.cc DEPS tensor) cc_test(eigen_test SRCS eigen_test.cc DEPS tensor)
cc_library(lod_tensor SRCS lod_tensor.cc details/lod_tensor.cc DEPS ddim place tensor)
cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor)
cc_test(variable_test SRCS variable_test.cc) cc_test(variable_test SRCS variable_test.cc)
cc_library(scope SRCS scope.cc) cc_library(scope SRCS scope.cc)
...@@ -32,6 +35,11 @@ py_proto_compile(framework_py_proto SRCS attribute.proto op_proto.proto op_desc. ...@@ -32,6 +35,11 @@ py_proto_compile(framework_py_proto SRCS attribute.proto op_proto.proto op_desc.
# Generate an empty __init__.py to make framework_py_proto as a valid python module. # Generate an empty __init__.py to make framework_py_proto as a valid python module.
add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(framework_py_proto framework_py_proto_init) add_dependencies(framework_py_proto framework_py_proto_init)
add_custom_command(TARGET framework_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/proto
COMMAND cp *.py ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/proto/
COMMENT "Copy generated python proto into directory paddle/v2/framework/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
cc_library(backward SRCS backward.cc DEPS net_op) cc_library(backward SRCS backward.cc DEPS net_op)
cc_test(backward_test SRCS backward_test.cc DEPS backward) cc_test(backward_test SRCS backward_test.cc DEPS backward)
...@@ -40,12 +48,16 @@ if(WITH_PYTHON) ...@@ -40,12 +48,16 @@ if(WITH_PYTHON)
cc_library(paddle_pybind SHARED cc_library(paddle_pybind SHARED
SRCS pybind.cc SRCS pybind.cc
DEPS pybind python backward DEPS pybind python backward
fc_op
sgd_op sgd_op
add_op add_op
mul_op
rowwise_add_op
sigmoid_op
softmax_op
mean_op mean_op
cross_entropy_op cross_entropy_op
recurrent_op recurrent_op
uniform_random_op uniform_random_op
gaussian_random_op
fill_zeros_like_op) fill_zeros_like_op)
endif(WITH_PYTHON) endif(WITH_PYTHON)
...@@ -14,7 +14,6 @@ limitations under the License. */ ...@@ -14,7 +14,6 @@ limitations under the License. */
#pragma once #pragma once
#include <boost/variant.hpp>
#include <functional> #include <functional>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
...@@ -24,6 +23,7 @@ limitations under the License. */ ...@@ -24,6 +23,7 @@ limitations under the License. */
#include "paddle/framework/attribute.pb.h" #include "paddle/framework/attribute.pb.h"
#include "paddle/framework/op_desc.pb.h" #include "paddle/framework/op_desc.pb.h"
#include "paddle/platform/enforce.h" #include "paddle/platform/enforce.h"
#include "paddle/platform/variant.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -133,8 +133,9 @@ std::shared_ptr<OperatorBase> BackwardRecursive( ...@@ -133,8 +133,9 @@ std::shared_ptr<OperatorBase> BackwardRecursive(
std::shared_ptr<OperatorBase> grad_op = OpRegistry::CreateGradOp(forwardOp); std::shared_ptr<OperatorBase> grad_op = OpRegistry::CreateGradOp(forwardOp);
for (std::string& grad_input : grad_op->inputs_) { for (std::string& grad_input : grad_op->inputs_) {
if (no_grad_names.count(grad_input)) { if (no_grad_names.count(grad_input)) {
std::string prefix = // +1 for \0
grad_input.substr(0, grad_input.size() - kGradVarSuffix.size()); std::string prefix = grad_input.substr(
0, grad_input.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1);
grad_input = prefix + kZeroVarSuffix; grad_input = prefix + kZeroVarSuffix;
// If part of input gradient of that operator is not calculated, fill // If part of input gradient of that operator is not calculated, fill
...@@ -167,7 +168,7 @@ std::shared_ptr<OperatorBase> Backward( ...@@ -167,7 +168,7 @@ std::shared_ptr<OperatorBase> Backward(
std::unordered_set<std::string> no_grad_names; std::unordered_set<std::string> no_grad_names;
no_grad_names.reserve(no_grad_vars.size()); no_grad_names.reserve(no_grad_vars.size());
no_grad_names.insert(kEmptyVarName + kGradVarSuffix); no_grad_names.insert(std::string(kEmptyVarName) + kGradVarSuffix);
for (auto& name : no_grad_vars) { for (auto& name : no_grad_vars) {
no_grad_names.insert(name + kGradVarSuffix); no_grad_names.insert(name + kGradVarSuffix);
......
...@@ -171,10 +171,10 @@ TEST(Backward, simple_op_grad) { ...@@ -171,10 +171,10 @@ TEST(Backward, simple_op_grad) {
ASSERT_EQ(4UL, gop->inputs_.size()); ASSERT_EQ(4UL, gop->inputs_.size());
ASSERT_EQ(f::kEmptyVarName, gop->inputs_[0]); ASSERT_EQ(f::kEmptyVarName, gop->inputs_[0]);
ASSERT_EQ("rowwise_add_grad", gop->type_); ASSERT_EQ("rowwise_add_grad", gop->type_);
ASSERT_EQ("X" + f::kGradVarSuffix, gop->outputs_[0]); ASSERT_EQ(f::GradVarName("X"), gop->outputs_[0]);
ASSERT_EQ("b" + f::kGradVarSuffix, gop->outputs_[1]); ASSERT_EQ(f::GradVarName("b"), gop->outputs_[1]);
ASSERT_EQ("X" + f::kGradVarSuffix, gop->Output("X" + f::kGradVarSuffix)); ASSERT_EQ(f::GradVarName("X"), gop->Output(f::GradVarName("X")));
} }
TEST(Backward, simple_op_not_need_grad) { TEST(Backward, simple_op_not_need_grad) {
...@@ -182,7 +182,7 @@ TEST(Backward, simple_op_not_need_grad) { ...@@ -182,7 +182,7 @@ TEST(Backward, simple_op_not_need_grad) {
ASSERT_NE(fwd, nullptr); ASSERT_NE(fwd, nullptr);
auto gop = f::Backward(*fwd, {"X"}); auto gop = f::Backward(*fwd, {"X"});
ASSERT_EQ(std::find(gop->outputs_.begin(), gop->outputs_.end(), ASSERT_EQ(std::find(gop->outputs_.begin(), gop->outputs_.end(),
"X" + f::kGradVarSuffix), f::GradVarName("X")),
gop->outputs_.end()); gop->outputs_.end());
auto no_input_gop = f::Backward(*fwd, {"X", "b"}); auto no_input_gop = f::Backward(*fwd, {"X", "b"});
...@@ -250,18 +250,18 @@ TEST(Backward, net_input_of_network_not_need_grad) { ...@@ -250,18 +250,18 @@ TEST(Backward, net_input_of_network_not_need_grad) {
all_output.erase(f::kEmptyVarName); all_output.erase(f::kEmptyVarName);
for (auto &out : {"W1", "b1", "hidden0", "W2", "b2"}) { for (auto &out : {"W1", "b1", "hidden0", "W2", "b2"}) {
ASSERT_NE(all_output.find(out + f::kGradVarSuffix), all_output.end()); ASSERT_NE(all_output.find(f::GradVarName(out)), all_output.end());
} }
// Not Generated X // Not Generated X
ASSERT_EQ(all_output.find("X" + f::kGradVarSuffix), all_output.end()); ASSERT_EQ(all_output.find(f::GradVarName("X")), all_output.end());
ASSERT_EQ(2UL, bwd_net->ops_.size()); ASSERT_EQ(2UL, bwd_net->ops_.size());
ASSERT_TRUE(bwd_net->ops_[1]->IsNetOp()); ASSERT_TRUE(bwd_net->ops_[1]->IsNetOp());
auto first_fc_grad = static_cast<ops::NetOp *>(bwd_net->ops_[1].get()); auto first_fc_grad = static_cast<ops::NetOp *>(bwd_net->ops_[1].get());
ASSERT_EQ(3UL, first_fc_grad->ops_.size()); ASSERT_EQ(3UL, first_fc_grad->ops_.size());
ASSERT_EQ(f::kEmptyVarName, ASSERT_EQ(f::kEmptyVarName,
first_fc_grad->ops_[2]->Output("A" + f::kGradVarSuffix)); first_fc_grad->ops_[2]->Output(f::GradVarName("A")));
} }
TEST(Backward, net_shared_weight) { TEST(Backward, net_shared_weight) {
...@@ -313,15 +313,15 @@ TEST(Backward, op_part_of_output_are_not_need) { ...@@ -313,15 +313,15 @@ TEST(Backward, op_part_of_output_are_not_need) {
ASSERT_EQ(1UL, fill_zero.inputs_.size()); ASSERT_EQ(1UL, fill_zero.inputs_.size());
ASSERT_EQ("Z", fill_zero.inputs_[0]); ASSERT_EQ("Z", fill_zero.inputs_[0]);
ASSERT_EQ(1UL, fill_zero.outputs_.size()); ASSERT_EQ(1UL, fill_zero.outputs_.size());
ASSERT_EQ("Z" + f::kZeroVarSuffix, fill_zero.outputs_[0]); ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, fill_zero.outputs_[0]);
auto &d_many_out = *net->ops_[1]; auto &d_many_out = *net->ops_[1];
ASSERT_EQ("many_output_op_grad", d_many_out.type_); ASSERT_EQ("many_output_op_grad", d_many_out.type_);
ASSERT_EQ(1UL + 2UL + 2UL, d_many_out.inputs_.size()); // I/O/OG ASSERT_EQ(1UL + 2UL + 2UL, d_many_out.inputs_.size()); // I/O/OG
ASSERT_EQ("Z" + f::kZeroVarSuffix, d_many_out.Input("z" + f::kGradVarSuffix)); ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix,
ASSERT_EQ("Y" + f::kGradVarSuffix, d_many_out.Input("y" + f::kGradVarSuffix)); d_many_out.Input(f::GradVarName("z")));
ASSERT_EQ("X" + f::kGradVarSuffix, ASSERT_EQ(f::GradVarName("Y"), d_many_out.Input(f::GradVarName("y")));
d_many_out.Output("x" + f::kGradVarSuffix)); ASSERT_EQ(f::GradVarName("X"), d_many_out.Output(f::GradVarName("x")));
} }
TEST(Backward, op_part_of_input_are_not_need) { TEST(Backward, op_part_of_input_are_not_need) {
...@@ -331,10 +331,9 @@ TEST(Backward, op_part_of_input_are_not_need) { ...@@ -331,10 +331,9 @@ TEST(Backward, op_part_of_input_are_not_need) {
ASSERT_EQ(grad_mul.type_, "mul_grad"); ASSERT_EQ(grad_mul.type_, "mul_grad");
ASSERT_EQ(grad_mul.inputs_.size(), 2UL + 1UL + 1UL); ASSERT_EQ(grad_mul.inputs_.size(), 2UL + 1UL + 1UL);
ASSERT_EQ(grad_mul.outputs_.size(), 2UL); ASSERT_EQ(grad_mul.outputs_.size(), 2UL);
ASSERT_EQ(grad_mul.Output("A" + f::kGradVarSuffix), f::kEmptyVarName); ASSERT_EQ(grad_mul.Output(f::GradVarName("A")), f::kEmptyVarName);
ASSERT_EQ(grad_mul.Output("B" + f::kGradVarSuffix), "b" + f::kGradVarSuffix); ASSERT_EQ(grad_mul.Output(f::GradVarName("B")), f::GradVarName("b"));
ASSERT_EQ(grad_mul.Input("Out" + f::kGradVarSuffix), ASSERT_EQ(grad_mul.Input(f::GradVarName("Out")), f::GradVarName("out"));
"out" + f::kGradVarSuffix);
ASSERT_EQ(grad_mul.Input("A"), "a"); ASSERT_EQ(grad_mul.Input("A"), "a");
ASSERT_EQ(grad_mul.Input("B"), "b"); ASSERT_EQ(grad_mul.Input("B"), "b");
ASSERT_EQ(grad_mul.Input("Out"), "out"); ASSERT_EQ(grad_mul.Input("Out"), "out");
......
...@@ -14,12 +14,12 @@ limitations under the License. */ ...@@ -14,12 +14,12 @@ limitations under the License. */
#pragma once #pragma once
#include <boost/variant.hpp>
#include <initializer_list> #include <initializer_list>
#include <stdexcept> #include <stdexcept>
#include <vector> #include <vector>
#include "paddle/framework/dim.h" #include "paddle/framework/dim.h"
#include "paddle/platform/enforce.h" #include "paddle/platform/enforce.h"
#include "paddle/platform/variant.h"
#include "unsupported/Eigen/CXX11/Tensor" #include "unsupported/Eigen/CXX11/Tensor"
namespace paddle { namespace paddle {
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/lod_tensor.h"
#include <memory>
namespace paddle {
namespace framework {
namespace details {
using LOD = LODTensor::LOD;
std::shared_ptr<LOD> SliceLOD(const LOD &lod, size_t level_begin,
size_t level_end) {
auto new_lod = std::make_shared<LOD>();
new_lod->reserve(level_end - level_begin);
for (size_t i = level_begin; i < level_end; i++) {
new_lod->emplace_back(lod[i]);
}
return new_lod;
}
std::shared_ptr<LOD> SliceLOD(const LOD &lod, size_t level, size_t elem_begin,
size_t elem_end, bool tensor_shared) {
// slice the lod.
auto new_lod = std::make_shared<LOD>();
new_lod->reserve(lod.size() - level);
auto start = lod.at(level)[elem_begin];
auto end = lod.at(level)[elem_end];
for (auto it = lod.begin() + level; it != lod.end(); it++) {
auto it_begin = std::find(it->begin(), it->end(), start);
auto it_end = std::find(it_begin, it->end(), end);
PADDLE_ENFORCE(it_begin != it->end(), "error in parsing lod info");
PADDLE_ENFORCE(it_end != it->end(), "error in parsing lod info");
new_lod->emplace_back(it_begin, it_end + 1);
if (!tensor_shared) {
// reset offset if tensor is copyed and sliced.
std::transform(new_lod->back().begin(), new_lod->back().end(),
new_lod->back().begin(),
[start](int v) { return v - start; });
PADDLE_ENFORCE(new_lod->back().front() == 0, "error in slice LOD");
}
}
return new_lod;
}
} // namespace details
} // namespace framework
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
namespace paddle {
namespace framework {
namespace details {
/*
* Slice levels from LOD.
*
* @lod: LOD to slice.
* @level_begin: level to begin slice.
* @level_end: level to end slice.
*/
std::shared_ptr<LODTensor::LOD> SliceLOD(const LODTensor::LOD &lod,
size_t level_begin, size_t level_end);
/*
* Slice elements from a level of LOD.
*
* @lod: LOD to slice.
* @level: which level to slice.
* @elem_begin: element's index to begin slice.
* @elem_end: element's index to end slice.
*/
std::shared_ptr<LODTensor::LOD> SliceLOD(const LODTensor::LOD &lod,
size_t level, size_t elem_begin,
size_t elem_end, bool tensor_shared);
} // namespace details
} // namespace framework
} // namespace paddle
...@@ -83,21 +83,19 @@ TEST(GradOpBuilder, MutiInOut) { ...@@ -83,21 +83,19 @@ TEST(GradOpBuilder, MutiInOut) {
EXPECT_EQ(grad_test_op->Input("Out1"), "out1"); EXPECT_EQ(grad_test_op->Input("Out1"), "out1");
EXPECT_EQ(grad_test_op->Inputs("Out2_mult"), EXPECT_EQ(grad_test_op->Inputs("Out2_mult"),
std::vector<std::string>({"out2_1", "out2_2"})); std::vector<std::string>({"out2_1", "out2_2"}));
EXPECT_EQ(grad_test_op->Input("Out1" + f::kGradVarSuffix), EXPECT_EQ(grad_test_op->Input(f::GradVarName("Out1")),
"out1" + f::kGradVarSuffix); f::GradVarName("out1"));
EXPECT_EQ(grad_test_op->Inputs("Out2_mult" + f::kGradVarSuffix), EXPECT_EQ(grad_test_op->Inputs(f::GradVarName("Out2_mult")),
std::vector<std::string>( std::vector<std::string>(
{"out2_1" + f::kGradVarSuffix, "out2_2" + f::kGradVarSuffix})); {f::GradVarName("out2_1"), f::GradVarName("out2_2")}));
ASSERT_EQ(grad_test_op->outputs_.size(), 5UL); ASSERT_EQ(grad_test_op->outputs_.size(), 5UL);
EXPECT_EQ(grad_test_op->Output("In1" + f::kGradVarSuffix), EXPECT_EQ(grad_test_op->Output(f::GradVarName("In1")), f::GradVarName("in1"));
"in1" + f::kGradVarSuffix); EXPECT_EQ(grad_test_op->Outputs(f::GradVarName("In2_mult")),
EXPECT_EQ(grad_test_op->Outputs("In2_mult" + f::kGradVarSuffix), std::vector<std::string>({f::GradVarName("in2_1"),
std::vector<std::string>({"in2_1" + f::kGradVarSuffix, f::GradVarName("in2_2"),
"in2_2" + f::kGradVarSuffix, f::GradVarName("in2_3")}));
"in2_3" + f::kGradVarSuffix})); EXPECT_EQ(grad_test_op->Output(f::GradVarName("In3")), f::GradVarName("in3"));
EXPECT_EQ(grad_test_op->Output("In3" + f::kGradVarSuffix),
"in3" + f::kGradVarSuffix);
} }
TEST(GradOpBuilder, IOIgnoredInGradient) { TEST(GradOpBuilder, IOIgnoredInGradient) {
...@@ -119,19 +117,18 @@ TEST(GradOpBuilder, IOIgnoredInGradient) { ...@@ -119,19 +117,18 @@ TEST(GradOpBuilder, IOIgnoredInGradient) {
EXPECT_EQ(grad_test_op->Inputs("Out1_mult"), EXPECT_EQ(grad_test_op->Inputs("Out1_mult"),
std::vector<std::string>({"out1_1", "out1_2"})); std::vector<std::string>({"out1_1", "out1_2"}));
EXPECT_EQ(grad_test_op->Input("Out2"), f::kEmptyVarName); EXPECT_EQ(grad_test_op->Input("Out2"), f::kEmptyVarName);
EXPECT_EQ(grad_test_op->Inputs("Out1_mult" + f::kGradVarSuffix), EXPECT_EQ(grad_test_op->Inputs(f::GradVarName("Out1_mult")),
std::vector<std::string>( std::vector<std::string>(
{"out1_1" + f::kGradVarSuffix, "out1_2" + f::kGradVarSuffix})); {f::GradVarName("out1_1"), f::GradVarName("out1_2")}));
EXPECT_EQ(grad_test_op->Input("Out2" + f::kGradVarSuffix), EXPECT_EQ(grad_test_op->Input(f::GradVarName("Out2")),
"out2" + f::kGradVarSuffix); f::GradVarName("out2"));
ASSERT_EQ(grad_test_op->outputs_.size(), 5UL); ASSERT_EQ(grad_test_op->outputs_.size(), 5UL);
EXPECT_EQ(grad_test_op->Output("In1" + f::kGradVarSuffix), EXPECT_EQ(grad_test_op->Output(f::GradVarName("In1")), f::GradVarName("in1"));
"in1" + f::kGradVarSuffix); EXPECT_EQ(grad_test_op->Outputs(f::GradVarName("In2_mult")),
EXPECT_EQ(grad_test_op->Outputs("In2_mult" + f::kGradVarSuffix),
std::vector<std::string>( std::vector<std::string>(
{"in2_1" + f::kGradVarSuffix, "in2_2" + f::kGradVarSuffix})); {f::GradVarName("in2_1"), f::GradVarName("in2_2")}));
EXPECT_EQ(grad_test_op->Outputs("In3_mult" + f::kGradVarSuffix), EXPECT_EQ(grad_test_op->Outputs(f::GradVarName("In3_mult")),
std::vector<std::string>( std::vector<std::string>(
{"in3_1" + f::kGradVarSuffix, "in3_2" + f::kGradVarSuffix})); {f::GradVarName("in3_1"), f::GradVarName("in3_2")}));
} }
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/lod_tensor.h"
#include <glog/logging.h>
namespace paddle {
namespace framework {
LODTensor LODTensor::SliceShared(size_t level_begin, size_t level_end) const {
PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced.");
auto new_lod = details::SliceLOD(*lod_start_pos_, level_begin, level_end);
// slice levels just need to update LOD info, each level will contains the
// whole tensor_, so no need to modify tensor_.
return LODTensor(tensor_, new_lod);
}
LODTensor LODTensor::SliceShared(size_t level, size_t elem_begin,
size_t elem_end) const {
PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced.");
PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level,
NumLevels());
PADDLE_ENFORCE(elem_begin < NumElements(level),
"element begin [%d] out of range [%d]", elem_begin,
NumElements(level));
PADDLE_ENFORCE(elem_end < NumElements(level) + 1,
"element end [%d] out of range [%d]", elem_end,
NumElements(level));
auto new_lod = details::SliceLOD(*lod_start_pos_, level, elem_begin, elem_end,
true /*tensor_shared*/);
// slice elements just need to update LOD info, because offsets are not
// changed, so the original tensor_ can be reused.
return LODTensor(tensor_, new_lod);
}
} // namespace framework
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#if (!PADDLE_ONLY_CPU)
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#endif
#include "paddle/framework/ddim.h"
#include "paddle/framework/tensor.h"
#include "paddle/platform/enforce.h"
namespace paddle {
namespace framework {
/*
* LODTensor (Level of details Tensor)
* see https://en.wikipedia.org/wiki/Level_of_details for reference.
*/
class LODTensor {
public:
// Level save offsets of each unit.
#ifdef PADDLE_ONLY_CPU
using Level = std::vector<size_t>;
#else
using Level = thrust::device_vector<size_t>;
#endif
// LOD stores offsets of each level of units, the largest units level first,
// then the smaller units level. Each Level stores the offsets of units in
// Tesor.
typedef std::vector<Level> LOD;
LODTensor() {}
LODTensor(const std::shared_ptr<Tensor> &tensor,
const std::shared_ptr<LOD> &lod) {
Reset(tensor, lod);
}
void Reset(const std::shared_ptr<Tensor> &tensor,
const std::shared_ptr<LOD> &lod) {
tensor_ = tensor;
lod_start_pos_ = lod;
}
/*
* Get a element from LOD.
*/
size_t lod_element(size_t level, size_t elem) const {
PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level,
NumLevels());
PADDLE_ENFORCE(elem < NumElements(level),
"element begin [%d] out of range [%d]", elem,
NumElements(level));
return (*lod_start_pos_)[level][elem];
}
/*
* Number of LODTensor's levels, each level has units of data, for example,
* in the sentence's view, article, paragraph, sentence are 3 levels.
*/
size_t NumLevels() const {
return lod_start_pos_ ? lod_start_pos_->size() : 0UL;
}
/*
* Number of elements in a level.
*/
size_t NumElements(size_t level = 0) const {
PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level,
NumLevels());
// the last offset is the end of last element
return lod_start_pos_->at(level).size() - 1;
}
/*
* Slice of levels[level_begin:level_end], with tensor copied.
*/
template <typename T>
LODTensor SliceCopied(size_t level_begin, size_t level_end,
const platform::Place &dst_place) const;
/*
* Slice of levels[level_begin:level_end], with tensor shared.
*/
LODTensor SliceShared(size_t level_begin, size_t level_end) const;
/*
* Slice of elements of a level, [elem_begin: elem_end], with tensor copied.
* @note: low performance in slice lod_start_pos_.
*/
template <typename T>
LODTensor SliceCopied(size_t level, size_t elem_begin, size_t elem_end,
const platform::Place &dst_place) const;
/*
* Slice of elements of a level, [elem_begin: elem_end], with tensor shared.
* @note: low performance in slice lod_start_pos_.
*/
LODTensor SliceShared(size_t level, size_t elem_begin, size_t elem_end) const;
/*
* Copy other's lod_start_pos_, to share LOD info.
* @note: the LOD info should not be changed.
*/
void ShareLOD(const LODTensor &other) {
lod_start_pos_ = other.lod_start_pos_;
}
/*
* Copy other's lod_start_pos_'s content, free to mutate.
*/
void CopyLOD(const LODTensor &other) {
lod_start_pos_ = std::make_shared<LOD>(*other.lod_start_pos_);
}
/*
* Determine whether LODTensor has a valid LOD info.
*/
bool HasLOD() const { return bool(lod_start_pos_); }
LOD *lod() const { return lod_start_pos_.get(); }
std::shared_ptr<Tensor> &tensor() { return tensor_; }
Tensor *raw_tensor() { return tensor_.get(); }
private:
std::shared_ptr<LOD> lod_start_pos_;
std::shared_ptr<Tensor> tensor_;
};
} // namespace framework
} // namespace paddle
#include "paddle/framework/lod_tensor_impl.h"
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/details/lod_tensor.h"
namespace paddle {
namespace framework {
template <typename T>
LODTensor LODTensor::SliceCopied(size_t level_begin, size_t level_end,
const platform::Place &dst_place) const {
PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced.");
auto new_lod = details::SliceLOD(*lod_start_pos_, level_begin, level_end);
auto new_tensor = std::make_shared<Tensor>();
new_tensor->CopyFrom<T>(*tensor_, dst_place);
return LODTensor(new_tensor, new_lod);
}
template <typename T>
LODTensor LODTensor::SliceCopied(size_t level, size_t elem_begin,
size_t elem_end,
const platform::Place &dst_place) const {
PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced.");
PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level,
NumLevels());
PADDLE_ENFORCE(elem_begin < NumElements(level),
"element begin [%d] out of range [%d]", elem_begin,
NumElements(level));
PADDLE_ENFORCE(elem_end < NumElements(level) + 1,
"element end [%d] out of range [%d]", elem_end,
NumElements(level));
auto new_lod = details::SliceLOD(*lod_start_pos_, level, elem_begin, elem_end,
false /*tensor_shared*/);
auto start_idx = new_lod->front().front();
auto end_idx = new_lod->front().back() - 1 /*the next element's start*/;
auto sliced_tensor = tensor_->Slice<T>(start_idx, end_idx);
auto new_tensor = std::make_shared<Tensor>();
new_tensor->CopyFrom<T>(sliced_tensor, dst_place);
return LODTensor(new_tensor, new_lod);
}
} // namespace framework
} // namespace paddle
/*
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "paddle/framework/lod_tensor.h"
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <memory>
namespace paddle {
namespace framework {
class LODTensorTester : public ::testing::Test {
public:
virtual void SetUp() override {
lod_tensor.reset(new LODTensor);
// tensor's batch_size: 30
// 3 levels
// 0 10 20
// 0 5 10 15 20
// 0 2 5 7 10 12 15 20
auto lod = std::make_shared<LODTensor::LOD>();
lod->push_back(std::vector<size_t>{0, 10, 20});
lod->push_back(std::vector<size_t>{0, 5, 10, 15, 20});
lod->push_back(std::vector<size_t>{0, 2, 5, 7, 10, 12, 15, 17, 20});
auto tensor = std::make_shared<Tensor>();
tensor->Resize({20 /*batch size*/, 128 /*dim*/});
// malloc memory
tensor->mutable_data<float>(place);
lod_tensor->Reset(tensor, lod);
}
protected:
std::unique_ptr<LODTensor> lod_tensor;
platform::CPUPlace place;
};
TEST_F(LODTensorTester, NumLevels) { ASSERT_EQ(lod_tensor->NumLevels(), 3UL); }
TEST_F(LODTensorTester, NumElements) {
ASSERT_EQ(lod_tensor->NumElements(0), 2UL);
ASSERT_EQ(lod_tensor->NumElements(1), 4UL);
ASSERT_EQ(lod_tensor->NumElements(2), 8UL);
}
TEST_F(LODTensorTester, SliceShared_Level) {
// slice 1 level
for (size_t level = 0; level < 3UL; ++level) {
auto new_lod_tensor = lod_tensor->SliceShared(level, level + 1);
ASSERT_EQ(new_lod_tensor.NumLevels(), 1UL);
ASSERT_EQ(new_lod_tensor.NumElements(0UL), lod_tensor->NumElements(level));
ASSERT_EQ(new_lod_tensor.tensor(), lod_tensor->tensor());
}
// slice 2 level
for (size_t level = 0; level < 2UL; ++level) {
auto new_lod_tensor = lod_tensor->SliceShared(level, level + 2);
ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL);
ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor->NumElements(level));
ASSERT_EQ(new_lod_tensor.NumElements(1),
lod_tensor->NumElements(level + 1));
ASSERT_EQ(new_lod_tensor.tensor(), lod_tensor->tensor());
}
}
TEST_F(LODTensorTester, SliceCopied_Level) {
// slice 1 level
for (size_t level = 0; level < 3UL; ++level) {
auto new_lod_tensor =
lod_tensor->SliceCopied<float>(level, level + 1, place);
ASSERT_EQ(new_lod_tensor.NumLevels(), 1UL);
ASSERT_EQ(new_lod_tensor.NumElements(0UL), lod_tensor->NumElements(level));
// ASSERT_EQ(new_lod_tensor.tensor(), lod_tensor->tensor());
// TODO(superjom) add tensor comparation here.
}
// slice 2 level
for (size_t level = 0; level < 2UL; ++level) {
auto new_lod_tensor =
lod_tensor->SliceCopied<float>(level, level + 2, place);
ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL);
ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor->NumElements(level));
ASSERT_EQ(new_lod_tensor.NumElements(1),
lod_tensor->NumElements(level + 1));
// ASSERT_EQ(new_lod_tensor.tensor(), lod_tensor->tensor());
// TODO(superjom) add tensor comparation here.
}
}
TEST_F(LODTensorTester, SliceShared_Element) {
size_t level = 0;
auto new_lod_tensor = lod_tensor->SliceShared(level, 0, 2);
ASSERT_EQ(new_lod_tensor.NumLevels(), 3UL);
ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL);
ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL);
ASSERT_EQ(new_lod_tensor.NumElements(2), 8UL);
ASSERT_EQ(new_lod_tensor.raw_tensor(), lod_tensor->raw_tensor());
level = 1;
new_lod_tensor = lod_tensor->SliceShared(level, 0, 2);
ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL);
ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL);
ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL);
ASSERT_EQ(new_lod_tensor.raw_tensor(), lod_tensor->raw_tensor());
}
TEST_F(LODTensorTester, SliceCopied_Element) {
size_t level = 0;
auto new_lod_tensor = lod_tensor->SliceCopied<float>(level, 0, 2, place);
ASSERT_EQ(new_lod_tensor.NumLevels(), 3UL);
ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL);
ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL);
ASSERT_EQ(new_lod_tensor.NumElements(2), 8UL);
ASSERT_NE(new_lod_tensor.raw_tensor(), lod_tensor->raw_tensor());
level = 1;
new_lod_tensor = lod_tensor->SliceCopied<float>(level, 0, 2, place);
ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL);
ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL);
ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL);
ASSERT_NE(new_lod_tensor.raw_tensor(), lod_tensor->raw_tensor());
level = 1;
// LOD is
// 0 5 10
// 0 2 5 7 10
new_lod_tensor = lod_tensor->SliceCopied<float>(level, 1, 3, place);
ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL);
ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL);
ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL);
ASSERT_EQ(new_lod_tensor.lod_element(0, 0), 0UL);
ASSERT_EQ(new_lod_tensor.lod_element(0, 1), 5UL);
ASSERT_EQ(new_lod_tensor.lod_element(1, 0), 0UL);
ASSERT_EQ(new_lod_tensor.lod_element(1, 1), 2UL);
ASSERT_EQ(new_lod_tensor.lod_element(1, 2), 5UL);
ASSERT_EQ(new_lod_tensor.lod_element(1, 3), 7UL);
// TODO(superjom) compare the content of these tensors
}
TEST_F(LODTensorTester, ShareLOD) {
LODTensor new_lod_tensor;
new_lod_tensor.ShareLOD(*lod_tensor);
ASSERT_EQ(new_lod_tensor.lod(), lod_tensor->lod());
}
TEST_F(LODTensorTester, CopyLOD) {
LODTensor new_lod_tensor;
new_lod_tensor.CopyLOD(*lod_tensor);
ASSERT_NE(new_lod_tensor.lod(), lod_tensor->lod());
}
} // namespace framework
} // namespace paddle
...@@ -307,22 +307,45 @@ class OpRegistry { ...@@ -307,22 +307,45 @@ class OpRegistry {
} }
}; };
class Registrar {
public:
// In our design, various kinds of classes, e.g., operators and kernels, have
// their corresponding registry and registrar. The action of registration is
// in the constructor of a global registrar variable, which, however, are not
// used in the code that calls package framework, and would be removed from
// the generated binary file by the linker. To avoid such removal, we add
// Touch to all registrar classes and make USE_OP macros to call this
// method. So, as long as the callee code calls USE_OP, the global
// registrar variable won't be removed by the linker.
void Touch() {}
};
template <typename OpType, typename ProtoMakerType> template <typename OpType, typename ProtoMakerType>
class OpRegisterHelper { class OpRegistrar : public Registrar {
public: public:
explicit OpRegisterHelper(const char* op_type) { explicit OpRegistrar(const char* op_type) {
OpRegistry::RegisterOp<OpType, ProtoMakerType>(op_type); OpRegistry::RegisterOp<OpType, ProtoMakerType>(op_type);
} }
}; };
template <typename GradOpType> template <typename GradOpType>
class GradOpRegisterHelper { class GradOpRegistrar : public Registrar {
public: public:
GradOpRegisterHelper(const char* op_type, const char* grad_op_type) { GradOpRegistrar(const char* op_type, const char* grad_op_type) {
OpRegistry::RegisterGradOp<GradOpType>(op_type, grad_op_type); OpRegistry::RegisterGradOp<GradOpType>(op_type, grad_op_type);
} }
}; };
template <typename PlaceType, typename KernelType>
class OpKernelRegistrar : public Registrar {
public:
explicit OpKernelRegistrar(const char* op_type) {
OperatorWithKernel::OpKernelKey key;
key.place_ = PlaceType();
OperatorWithKernel::AllOpKernels()[op_type][key].reset(new KernelType);
}
};
/** /**
* check if MACRO is used in GLOBAL NAMESPACE. * check if MACRO is used in GLOBAL NAMESPACE.
*/ */
...@@ -333,97 +356,121 @@ class GradOpRegisterHelper { ...@@ -333,97 +356,121 @@ class GradOpRegisterHelper {
msg) msg)
/** /**
* Macro to Register Operator. * Macro to register Operator.
*/ */
#define REGISTER_OP(__op_type, __op_class, __op_maker_class) \ #define REGISTER_OP(op_type, op_class, op_maker_class) \
STATIC_ASSERT_GLOBAL_NAMESPACE(__reg_op__##__op_type, \ STATIC_ASSERT_GLOBAL_NAMESPACE( \
"REGISTER_OP must be in global namespace"); \ __reg_op__##op_type, "REGISTER_OP must be called in global namespace"); \
static ::paddle::framework::OpRegisterHelper<__op_class, __op_maker_class> \ static ::paddle::framework::OpRegistrar<op_class, op_maker_class> \
__op_register_##__op_type##__(#__op_type); \ __op_registrar_##op_type##__(#op_type); \
int __op_register_##__op_type##_handle__() { return 0; } int TouchOpRegistrar_##op_type() { \
__op_registrar_##op_type##__.Touch(); \
return 0; \
}
/** /**
* Macro to Register Gradient Operator. * Macro to register Gradient Operator.
*/ */
#define REGISTER_GRADIENT_OP(__op_type, __grad_op_type, __grad_op_class) \ #define REGISTER_GRADIENT_OP(op_type, grad_op_type, grad_op_class) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \ STATIC_ASSERT_GLOBAL_NAMESPACE( \
__reg_gradient_op__##__op_type##__grad_op_type, \ __reg_gradient_op__##op_type##_##grad_op_type, \
"REGISTER_GRADIENT_OP must be in global namespace"); \ "REGISTER_GRADIENT_OP must be called in global namespace"); \
static ::paddle::framework::GradOpRegisterHelper<__grad_op_class> \ static ::paddle::framework::GradOpRegistrar<grad_op_class> \
__op_gradient_register_##__op_type##__grad_op_type##__(#__op_type, \ __op_gradient_registrar_##op_type##_##grad_op_type##__(#op_type, \
#__grad_op_type); \ #grad_op_type); \
int __op_gradient_register_##__op_type##__grad_op_type##_handle__() { \ int TouchOpGradientRegistrar_##op_type() { \
__op_gradient_registrar_##op_type##_##grad_op_type##__.Touch(); \
return 0; \ return 0; \
} }
/** /**
* Macro to Forbid user register Gradient Operator. * Macro to register OperatorKernel.
*/ */
#define NO_GRADIENT(__op_type) \ #define REGISTER_OP_KERNEL(op_type, DEVICE_TYPE, place_class, ...) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \ STATIC_ASSERT_GLOBAL_NAMESPACE( \
__reg_gradient_op__##__op_type##__op_type##_grad, \ __reg_op_kernel_##op_type##_##DEVICE_TYPE##__, \
"NO_GRADIENT must be in global namespace") "REGISTER_OP_KERNEL must be called in global namespace"); \
static ::paddle::framework::OpKernelRegistrar<place_class, __VA_ARGS__> \
__op_kernel_registrar_##op_type##_##DEVICE_TYPE##__(#op_type); \
int TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE() { \
__op_kernel_registrar_##op_type##_##DEVICE_TYPE##__.Touch(); \
return 0; \
}
/** /**
* Macro to Register OperatorKernel. * Macro to Forbid user register Gradient Operator.
*/ */
#define REGISTER_OP_KERNEL(type, DEVICE_TYPE, PlaceType, ...) \ #define NO_GRADIENT(op_type) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \ STATIC_ASSERT_GLOBAL_NAMESPACE( \
__reg_op_kernel_##type##_##DEVICE_TYPE##__, \ __reg_gradient_op__##op_type##_##op_type##_grad, \
"REGISTER_OP_KERNEL must be in global namespace"); \ "NO_GRADIENT must be called in global namespace")
struct __op_kernel_register__##type##__##DEVICE_TYPE##__ { \
__op_kernel_register__##type##__##DEVICE_TYPE##__() { \ #define REGISTER_OP_GPU_KERNEL(op_type, ...) \
::paddle::framework::OperatorWithKernel::OpKernelKey key; \ REGISTER_OP_KERNEL(op_type, GPU, ::paddle::platform::GPUPlace, __VA_ARGS__)
key.place_ = PlaceType(); \
::paddle::framework::OperatorWithKernel::AllOpKernels()[#type][key] \ #define REGISTER_OP_CPU_KERNEL(op_type, ...) \
.reset(new __VA_ARGS__()); \ REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__)
} \
}; \
static __op_kernel_register__##type##__##DEVICE_TYPE##__ \
__reg_kernel_##type##__##DEVICE_TYPE##__; \
int __op_kernel_register_##type##_handle_##DEVICE_TYPE##__() { return 0; }
// (type, KernelType)
#define REGISTER_OP_GPU_KERNEL(type, ...) \
REGISTER_OP_KERNEL(type, GPU, ::paddle::platform::GPUPlace, __VA_ARGS__)
// (type, KernelType)
#define REGISTER_OP_CPU_KERNEL(type, ...) \
REGISTER_OP_KERNEL(type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__)
/** /**
* Macro to mark what Operator and Kernel we will use and tell the compiler to * Macro to mark what Operator and Kernel we will use and tell the compiler to
* link them into target. * link them into target.
*/ */
#define USE_OP_WITHOUT_KERNEL(op_type) \ #define USE_OP_ITSELF(op_type) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \
__use_op_itself_##op_type, \
"USE_OP_ITSELF must be called in global namespace"); \
extern int TouchOpRegistrar_##op_type(); \
static int use_op_itself_##op_type##_ __attribute__((unused)) = \
TouchOpRegistrar_##op_type()
// TODO(fengjiayi): Most ops' gradient op have not been compeleted. So we use
// `NO_GRAD` to disable micro USE_OP_GRADIENT(op_type). Otherwise the code can't
// be compiled. `NO_GRAD` should be removed after all gradient ops are
// compeleted.
#define NO_GRAD
#ifndef NO_GRAD
#define USE_OP_GRADIENT(op_type) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \ STATIC_ASSERT_GLOBAL_NAMESPACE( \
__use_op_without_kernel_##op_type, \ __use_op_gradient_##op_type, \
"USE_OP_WITHOUT_KERNEL must be in global namespace"); \ "USE_OP_GRADIENT must be called in global namespace"); \
extern int __op_register_##op_type##_handle__(); \ extern int TouchOpGradientRegistrar_##op_type(); \
static int __use_op_ptr_##op_type##_without_kernel__ \ static int use_op_gradient_##op_type##_ __attribute__((unused)) = \
__attribute__((unused)) = __op_register_##op_type##_handle__() TouchOpGradientRegistrar_##op_type()
#else
#define USE_OP_GRADIENT(op_type)
#endif
#define USE_OP_KERNEL(op_type, DEVICE_TYPE) \ #define USE_OP_DEVICE_KERNEL(op_type, DEVICE_TYPE) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \ STATIC_ASSERT_GLOBAL_NAMESPACE( \
__use_op_kernel_##op_type##_##DEVICE_TYPE##__, \ __use_op_kernel_##op_type##_##DEVICE_TYPE##__, \
"USE_OP_KERNEL must be in global namespace"); \ "USE_OP_DEVICE_KERNEL must be in global namespace"); \
extern int __op_kernel_register_##op_type##_handle_##DEVICE_TYPE##__(); \ extern int TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE(); \
static int __use_op_ptr_##op_type##_##DEVICE_TYPE##_kernel__ \ static int use_op_kernel_##op_type##_##DEVICE_TYPE##_ \
__attribute__((unused)) = \ __attribute__((unused)) = \
__op_kernel_register_##op_type##_handle_##DEVICE_TYPE##__() TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE()
// use Operator with only cpu kernel. // TODO(fengjiayi): The following macros seems ugly, do we have better method?
#define USE_OP_CPU(op_type) \
USE_OP_WITHOUT_KERNEL(op_type); \
USE_OP_KERNEL(op_type, CPU)
#ifdef PADDLE_ONLY_CPU #ifdef PADDLE_ONLY_CPU
#define USE_OP(op_type) USE_OP_CPU(op_type) #define USE_OP_KERNEL(op_type) USE_OP_DEVICE_KERNEL(op_type, CPU)
#else #else
#define USE_OP(op_type) \ #define USE_OP_KERNEL(op_type) \
USE_OP_CPU(op_type); \ USE_OP_DEVICE_KERNEL(op_type, CPU); \
USE_OP_KERNEL(op_type, GPU) USE_OP_DEVICE_KERNEL(op_type, GPU)
#endif #endif
#define USE_NO_GRAD_OP(op_type) \
USE_OP_ITSELF(op_type); \
USE_OP_KERNEL(op_type)
#define USE_CPU_OP(op_type) \
USE_OP_ITSELF(op_type); \
USE_OP_DEVICE_KERNEL(op_type, CPU); \
USE_OP_GRADIENT(op_type)
#define USE_OP(op_type) \
USE_NO_GRAD_OP(op_type); \
USE_OP_GRADIENT(op_type)
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -15,7 +15,6 @@ limitations under the License. */ ...@@ -15,7 +15,6 @@ limitations under the License. */
#pragma once #pragma once
#include <algorithm> #include <algorithm>
#include <boost/variant.hpp>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
...@@ -27,25 +26,26 @@ limitations under the License. */ ...@@ -27,25 +26,26 @@ limitations under the License. */
#include "paddle/framework/tensor.h" #include "paddle/framework/tensor.h"
#include "paddle/platform/device_context.h" #include "paddle/platform/device_context.h"
#include "paddle/platform/place.h" #include "paddle/platform/place.h"
#include "paddle/platform/variant.h"
#include "paddle/utils/Error.h" #include "paddle/utils/Error.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
/// If a variable is a empty variable, that name will be used. /// If a variable is a empty variable, that name will be used.
const std::string kEmptyVarName = "@EMPTY@"; constexpr char kEmptyVarName[] = "@EMPTY@";
/// If a variable is a temporary variable, that name will be set in Python, /// If a variable is a temporary variable, that name will be set in Python,
/// but it will be convert to a unique name in scope after OpCreator. /// but it will be convert to a unique name in scope after OpCreator.
const std::string kTempVarName = "@TEMP@"; constexpr char kTempVarName[] = "@TEMP@";
/// If a variable's name has a certain suffix, it means that the /// If a variable's name has a certain suffix, it means that the
/// variable is the gradient of another varibale. /// variable is the gradient of another varibale.
/// e.g. Variable "x@GRAD" is the gradient of varibale "x". /// e.g. Variable "x@GRAD" is the gradient of varibale "x".
const std::string kGradVarSuffix = "@GRAD"; constexpr char kGradVarSuffix[] = "@GRAD";
/// Variables with this suffix are supposed to be filled up with zeros. /// Variables with this suffix are supposed to be filled up with zeros.
const std::string kZeroVarSuffix = "@ZERO"; constexpr char kZeroVarSuffix[] = "@ZERO";
inline std::string GradVarName(const std::string& var_name) { inline std::string GradVarName(const std::string& var_name) {
return var_name + kGradVarSuffix; return var_name + kGradVarSuffix;
...@@ -95,16 +95,21 @@ class OperatorBase { ...@@ -95,16 +95,21 @@ class OperatorBase {
//! Get a input with argument's name described in `op_proto` //! Get a input with argument's name described in `op_proto`
const std::string& Input(const std::string& name) const; const std::string& Input(const std::string& name) const;
//! Get a input which has multiple variables. //! Get a input which has multiple variables.
//! TODO add a vector_view to prevent memory copy. //! TODO add a vector_view to prevent memory copy.
std::vector<std::string> Inputs(const std::string& name) const; std::vector<std::string> Inputs(const std::string& name) const;
//! Get a output with argument's name described in `op_proto` //! Get a output with argument's name described in `op_proto`
const std::string& Output(const std::string& name) const; const std::string& Output(const std::string& name) const;
//! Get an output which has multiple variables. //! Get an output which has multiple variables.
//! TODO add a vector_view to prevent memory copy. //! TODO add a vector_view to prevent memory copy.
std::vector<std::string> Outputs(const std::string& name) const; std::vector<std::string> Outputs(const std::string& name) const;
const std::string Type() const { return type_; }
const std::vector<std::string> Inputs() const { return inputs_; }
const std::vector<std::string> Outputs() const { return outputs_; }
const AttributeMap& Attrs() const { return attrs_; }
public: public:
std::string type_; std::string type_;
// NOTE: in case of OpGrad, inputs_ contains: // NOTE: in case of OpGrad, inputs_ contains:
...@@ -120,10 +125,10 @@ class OperatorBase { ...@@ -120,10 +125,10 @@ class OperatorBase {
std::shared_ptr<std::unordered_map<std::string, int>> in_out_idxs_; std::shared_ptr<std::unordered_map<std::string, int>> in_out_idxs_;
}; };
class OperatorContext { class InferShapeContext {
public: public:
OperatorContext(const OperatorBase* op, const Scope& scope) InferShapeContext(const OperatorBase& op, const Scope& scope)
: op_(*op), scope_(scope) {} : op_(op), scope_(scope) {}
size_t InputSize() const { return op_.inputs_.size(); } size_t InputSize() const { return op_.inputs_.size(); }
...@@ -234,12 +239,6 @@ class OperatorContext { ...@@ -234,12 +239,6 @@ class OperatorContext {
const Scope& scope_; const Scope& scope_;
}; };
class InferShapeContext : public OperatorContext {
public:
InferShapeContext(const OperatorBase* op, const Scope& scope)
: OperatorContext(op, scope) {}
};
template <typename T> template <typename T>
struct EigenDeviceConverter; struct EigenDeviceConverter;
...@@ -255,11 +254,11 @@ struct EigenDeviceConverter<platform::GPUPlace> { ...@@ -255,11 +254,11 @@ struct EigenDeviceConverter<platform::GPUPlace> {
}; };
#endif #endif
class ExecutionContext : public OperatorContext { class ExecutionContext : public InferShapeContext {
public: public:
ExecutionContext(const OperatorBase* op, const Scope& scope, ExecutionContext(const OperatorBase& op, const Scope& scope,
const platform::DeviceContext* device_context) const platform::DeviceContext* device_context)
: OperatorContext(op, scope), device_context_(device_context) {} : InferShapeContext(op, scope), device_context_(device_context) {}
template <typename PlaceType, template <typename PlaceType,
typename DeviceType = typename DeviceType =
...@@ -311,13 +310,13 @@ class OperatorWithKernel : public OperatorBase { ...@@ -311,13 +310,13 @@ class OperatorWithKernel : public OperatorBase {
std::unordered_map<OpKernelKey, std::unique_ptr<OpKernel>, OpKernelHash>; std::unordered_map<OpKernelKey, std::unique_ptr<OpKernel>, OpKernelHash>;
void InferShape(const Scope& scope) const override { void InferShape(const Scope& scope) const override {
InferShape(InferShapeContext(this, scope)); InferShape(InferShapeContext(*this, scope));
} }
void Run(const Scope& scope, void Run(const Scope& scope,
const platform::DeviceContext& dev_ctx) const final { const platform::DeviceContext& dev_ctx) const final {
auto& opKernel = AllOpKernels().at(type_).at(OpKernelKey(dev_ctx)); auto& opKernel = AllOpKernels().at(type_).at(OpKernelKey(dev_ctx));
opKernel->Compute(ExecutionContext(this, scope, &dev_ctx)); opKernel->Compute(ExecutionContext(*this, scope, &dev_ctx));
} }
static std::unordered_map<std::string /* op_type */, OpKernelMap>& static std::unordered_map<std::string /* op_type */, OpKernelMap>&
......
...@@ -22,6 +22,7 @@ limitations under the License. */ ...@@ -22,6 +22,7 @@ limitations under the License. */
#include "paddle/operators/net_op.h" #include "paddle/operators/net_op.h"
#include "paddle/platform/enforce.h" #include "paddle/platform/enforce.h"
#include "paddle/platform/place.h" #include "paddle/platform/place.h"
#include "paddle/string/to_string.h"
#include "pybind11/numpy.h" #include "pybind11/numpy.h"
#include "pybind11/pybind11.h" #include "pybind11/pybind11.h"
#include "pybind11/stl.h" #include "pybind11/stl.h"
...@@ -29,17 +30,18 @@ limitations under the License. */ ...@@ -29,17 +30,18 @@ limitations under the License. */
namespace py = pybind11; namespace py = pybind11;
USE_OP(add_two); USE_OP(add_two);
USE_OP_CPU(onehot_cross_entropy); USE_CPU_OP(onehot_cross_entropy);
USE_OP_WITHOUT_KERNEL(fc); USE_NO_GRAD_OP(sgd);
USE_OP(sgd);
USE_OP(mul); USE_OP(mul);
USE_OP(mean); USE_OP(mean);
USE_OP(sigmoid); USE_OP(sigmoid);
USE_OP(softmax); USE_OP(softmax);
USE_OP(rowwise_add); USE_OP(rowwise_add);
USE_OP(fill_zeros_like); USE_OP(fill_zeros_like);
USE_OP_WITHOUT_KERNEL(recurrent_op); USE_OP_ITSELF(recurrent_op);
USE_OP(gaussian_random);
USE_OP(uniform_random); USE_OP(uniform_random);
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -205,9 +207,13 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -205,9 +207,13 @@ All parameter, weight, gradient are variables in Paddle.
}); });
// clang-format on // clang-format on
py::class_<paddle::platform::GPUPlace>(m, "GPUPlace").def(py::init<int>()); py::class_<platform::GPUPlace>(m, "GPUPlace")
.def(py::init<int>())
.def("__str__", string::to_string<const platform::GPUPlace &>);
py::class_<paddle::platform::CPUPlace>(m, "CPUPlace").def(py::init<>()); py::class_<paddle::platform::CPUPlace>(m, "CPUPlace")
.def(py::init<>())
.def("__str__", string::to_string<const platform::CPUPlace &>);
py::class_<OperatorBase, std::shared_ptr<OperatorBase>> operator_base( py::class_<OperatorBase, std::shared_ptr<OperatorBase>> operator_base(
m, "Operator"); m, "Operator");
......
...@@ -18,6 +18,8 @@ limitations under the License. */ ...@@ -18,6 +18,8 @@ limitations under the License. */
#include <cstring> #include <cstring>
#include <memory> #include <memory>
#include <typeindex> #include <typeindex>
#include <vector>
#include "paddle/framework/ddim.h" #include "paddle/framework/ddim.h"
#include "paddle/memory/memory.h" #include "paddle/memory/memory.h"
#include "paddle/platform/device_context.h" #include "paddle/platform/device_context.h"
...@@ -77,11 +79,11 @@ class Tensor { ...@@ -77,11 +79,11 @@ class Tensor {
inline const DDim& dims() const; inline const DDim& dims() const;
/*! Resize the dimensions of the memory block. */ /*! Resize the dimensions of the memory block. */
inline void Resize(const DDim& dims); inline Tensor& Resize(const DDim& dims);
/*! The internal of two tensors share the same memory block. */ /*! The internal of two tensors share the same memory block. */
template <typename T> template <typename T>
inline void ShareDataWith(const Tensor& src); inline Tensor& ShareDataWith(const Tensor& src);
/** /**
* @brief Copy the content of external tensor to a new place. * @brief Copy the content of external tensor to a new place.
......
...@@ -23,9 +23,11 @@ template <typename T> ...@@ -23,9 +23,11 @@ template <typename T>
inline void Tensor::check_memory_size() const { inline void Tensor::check_memory_size() const {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
holder_, "Tenosr holds no memory. Call Tensor::mutable_data first."); holder_, "Tenosr holds no memory. Call Tensor::mutable_data first.");
PADDLE_ENFORCE_GE(holder_->size(), product(dims_) * sizeof(T) + offset_, PADDLE_ENFORCE_GE(
holder_->size(), product(dims_) * sizeof(T) + offset_,
"Tensor's dims_ is out of bound. Call Tensor::mutable_data " "Tensor's dims_ is out of bound. Call Tensor::mutable_data "
"first to re-allocate memory."); "first to re-allocate memory.\n"
"or maybe the required data-type mismatches the data already stored.");
} }
template <typename T> template <typename T>
...@@ -78,9 +80,10 @@ inline T* Tensor::mutable_data(platform::Place place) { ...@@ -78,9 +80,10 @@ inline T* Tensor::mutable_data(platform::Place place) {
} }
template <typename T> template <typename T>
inline void Tensor::ShareDataWith(const Tensor& src) { inline Tensor& Tensor::ShareDataWith(const Tensor& src) {
src.check_memory_size<T>(); src.check_memory_size<T>();
*this = src; *this = src;
return *this;
} }
template <typename T> template <typename T>
...@@ -136,7 +139,10 @@ inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const { ...@@ -136,7 +139,10 @@ inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const {
return dst; return dst;
} }
inline void Tensor::Resize(const DDim& dims) { dims_ = dims; } inline Tensor& Tensor::Resize(const DDim& dims) {
dims_ = dims;
return *this;
}
inline const DDim& Tensor::dims() const { return dims_; } inline const DDim& Tensor::dims() const { return dims_; }
......
...@@ -19,7 +19,7 @@ TEST(Tensor, Dims) { ...@@ -19,7 +19,7 @@ TEST(Tensor, Dims) {
using namespace paddle::framework; using namespace paddle::framework;
using namespace paddle::platform; using namespace paddle::platform;
Tensor tt; Tensor tt;
tt.Resize(make_ddim({2, 3, 4})); tt.Resize({2, 3, 4});
DDim dims = tt.dims(); DDim dims = tt.dims();
ASSERT_EQ(arity(dims), 3); ASSERT_EQ(arity(dims), 3);
for (int i = 0; i < 3; ++i) { for (int i = 0; i < 3; ++i) {
......
...@@ -38,10 +38,11 @@ if(WITH_GPU) ...@@ -38,10 +38,11 @@ if(WITH_GPU)
add_simple_unittest(RowConvOpTest) add_simple_unittest(RowConvOpTest)
add_simple_unittest(BlockExpandOpTest) add_simple_unittest(BlockExpandOpTest)
add_simple_unittest(CropOpTest) add_simple_unittest(CropOpTest)
add_simple_unittest(DepthwiseConvOpTest)
endif() endif()
add_simple_unittest(ConvOpTest)
add_simple_unittest(Im2ColTest) add_simple_unittest(Im2ColTest)
add_simple_unittest(GemmConvOpTest)
endif() endif()
add_style_check_target(paddle_function ${h_files}) add_style_check_target(paddle_function ${h_files})
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include <memory>
#include "Function.h"
#include "FunctionTest.h"
namespace paddle {
enum TestType {
kForwardTest = 0,
kBackwardInputTest = 1,
kBackwardFilterTest = 2,
};
template <DeviceType DType1, DeviceType DType2>
class ConvolutionTest {
public:
ConvolutionTest(const std::string& conv1,
const std::string& conv2,
TestType type,
bool useGroups = true,
std::string algo = "auto") {
for (size_t batchSize : {1, 32}) {
for (size_t inputSize : {7, 14, 54}) {
for (size_t filterSize : {1, 3, 5}) {
for (size_t inputChannels : {3, 64}) {
for (size_t outputChannels : {3, 64}) {
if (inputChannels > outputChannels) break;
size_t groups;
if (!useGroups) {
groups = 1;
} else {
if (outputChannels % inputChannels != 0) continue;
groups = inputChannels;
}
for (size_t stride : {1, 2}) {
for (size_t padding : {0, 1}) {
if (padding >= filterSize) break;
size_t outputSize =
(inputSize - filterSize + 2 * padding + stride) / stride;
VLOG(3) << " batchSize=" << batchSize
<< " inputChannels=" << inputChannels
<< " inputHeight=" << inputSize
<< " inputWidth=" << inputSize
<< " outputChannels=" << outputChannels
<< " filterHeight=" << filterSize
<< " filterWidth=" << filterSize
<< " outputHeight=" << outputSize
<< " outputWidth=" << outputSize
<< " stride=" << stride << " padding=" << padding;
std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride};
Compare2Function<DType1, DType2> test(
conv1,
conv2,
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("groups", groups)
.set("algo", algo));
TensorShape input{
batchSize, inputChannels, inputSize, inputSize};
TensorShape filter;
if (groups > 1)
filter = TensorShape({groups,
outputChannels / groups,
inputChannels / groups,
filterSize,
filterSize});
else
filter = TensorShape({outputChannels,
inputChannels,
filterSize,
filterSize});
TensorShape output{
batchSize, outputChannels, outputSize, outputSize};
if (type == kForwardTest) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.run();
} else if (type == kBackwardInputTest) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, input), ADD_TO);
test.run();
} else if (type == kBackwardFilterTest) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter),
ADD_TO);
test.run();
}
}
}
}
}
}
}
}
}
};
// Mainly used to test cases where the height and width (input, filter)
// are not equal.
template <DeviceType DType1, DeviceType DType2>
class ConvolutionTest2 {
public:
ConvolutionTest2(const std::string& conv1,
const std::string& conv2,
TestType type,
bool useGroups = true,
std::string algo = "auto") {
for (size_t batchSize : {16}) {
for (size_t inputHeight : {7, 31}) {
for (size_t inputWidth : {10, 54}) {
for (size_t filterHeight : {1, 5}) {
for (size_t filterWidth : {3, 7}) {
for (size_t inputChannels : {7}) {
for (size_t outputChannels : {7}) {
size_t groups;
if (!useGroups) {
groups = 1;
} else {
if (outputChannels % inputChannels != 0) continue;
groups = inputChannels;
}
size_t stride = 1;
size_t padding = 0;
size_t outputHeight =
(inputHeight - filterHeight + 2 * padding + stride) /
stride;
size_t outputWidth =
(inputWidth - filterWidth + 2 * padding + stride) /
stride;
VLOG(3) << " batchSize=" << batchSize
<< " inputChannels=" << inputChannels
<< " inputHeight=" << inputHeight
<< " inputWidth=" << inputWidth
<< " outputChannels=" << outputChannels
<< " filterHeight=" << filterHeight
<< " filterWidth=" << filterWidth
<< " outputHeight=" << outputHeight
<< " outputWidth=" << outputWidth
<< " stride=" << stride << " padding=" << padding;
std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride};
Compare2Function<DType1, DType2> test(
conv1,
conv2,
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("groups", groups)
.set("algo", algo));
TensorShape input{
batchSize, inputChannels, inputHeight, inputWidth};
TensorShape filter;
if (groups > 1)
filter = TensorShape({groups,
outputChannels / groups,
inputChannels / groups,
filterHeight,
filterWidth});
else
filter = TensorShape({outputChannels,
inputChannels,
filterHeight,
filterWidth});
TensorShape output{
batchSize, outputChannels, outputHeight, outputWidth};
if (type == kForwardTest) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.run();
} else if (type == kBackwardInputTest) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, input), ADD_TO);
test.run();
} else if (type == kBackwardFilterTest) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter),
ADD_TO);
test.run();
}
}
}
}
}
}
}
}
}
};
// ======Start Convolution TEST======
TEST(Forward, GEMM) {
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU> test(
"NaiveConv-CPU", "GemmConv-CPU", kForwardTest, false);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU> test2(
"NaiveConv-CPU", "GemmConv-CPU", kForwardTest, false);
}
#ifndef PADDLE_ONLY_CPU
TEST(Forward, GEMM2) {
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
"GemmConv-CPU", "GemmConv-GPU", kForwardTest, false);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"GemmConv-CPU", "GemmConv-GPU", kForwardTest, false);
}
TEST(BackwardInput, GEMM) {
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
"GemmConvGradInput-CPU",
"GemmConvGradInput-GPU",
kBackwardInputTest,
false);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"GemmConvGradInput-CPU",
"GemmConvGradInput-GPU",
kBackwardInputTest,
false);
}
TEST(BackwardFilter, GEMM) {
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
"GemmConvGradFilter-CPU",
"GemmConvGradFilter-GPU",
kBackwardFilterTest,
false);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"GemmConvGradFilter-CPU",
"GemmConvGradFilter-GPU",
kBackwardFilterTest,
false);
}
#endif
// ======End Convolution TEST======
// ======Start DepthwiseConvolution TEST======
// TODO(zhaolong) The depthwise convolution cpu test will be added when the cpu
// version of depthwiseConv is implemented.
#ifndef PADDLE_ONLY_CPU
TEST(DepthwiseConvForward, GEMM2) {
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
"GemmConv-CPU", "DepthwiseConv-GPU", kForwardTest);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"GemmConv-CPU", "DepthwiseConv-GPU", kForwardTest);
}
TEST(DepthwiseConvBackwardInput, GEMM) {
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
"GemmConvGradInput-CPU",
"DepthwiseConvGradInput-GPU",
kBackwardInputTest);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"GemmConvGradInput-CPU",
"DepthwiseConvGradInput-GPU",
kBackwardInputTest);
}
TEST(DepthwiseConvBackwardFilter, GEMM) {
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
"GemmConvGradFilter-CPU",
"DepthwiseConvGradFilter-GPU",
kBackwardFilterTest);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"GemmConvGradFilter-CPU",
"DepthwiseConvGradFilter-GPU",
kBackwardFilterTest);
}
#endif
// ======End DepthwiseConvolution TEST======
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "FunctionTest.h"
namespace paddle {
template <DeviceType DType1, DeviceType DType2>
void forward(Compare2Function<DType1, DType2>& test,
const TensorShape& input,
const TensorShape& filter,
const TensorShape& output) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.run();
}
template <DeviceType DType1, DeviceType DType2>
void backward_input(Compare2Function<DType1, DType2>& test,
const TensorShape& input,
const TensorShape& filter,
const TensorShape& output) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, input), ADD_TO);
test.run();
}
template <DeviceType DType1, DeviceType DType2>
void backward_filter(Compare2Function<DType1, DType2>& test,
const TensorShape& input,
const TensorShape& filter,
const TensorShape& output) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter), ADD_TO);
test.run();
}
template <DeviceType DType1, DeviceType DType2>
using Function = void (*)(Compare2Function<DType1, DType2>& test,
const TensorShape& input,
const TensorShape& filter,
const TensorShape& output);
/**
* \brief A basic convolution function test interface.
*
* \param conv1 type name of convolution function 1.
* \param conv2 type name of convolution function 2.
* \param function test function, can be one of the forward, backward_input
* backward_filter function.
* Example:
* 1. Compare GemmConv's CPU and GPU implementation:
* Convolution<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU>(
* "GemmConv-CPU", "GemmConv-GPU", forward);
*/
template <DeviceType DType1, DeviceType DType2>
void Convolution(const std::string& conv1,
const std::string& conv2,
Function<DType1, DType2> function) {
for (size_t batchSize : {1, 5}) {
for (size_t inputSize : {7, 14, 31}) {
for (size_t filterSize : {1, 3, 5}) {
for (size_t inputChannels : {3, 16}) {
for (size_t outputChannels : {3, 16}) {
if (outputChannels < inputChannels) continue;
for (size_t stride : {1, 2}) {
for (size_t padding : {0, 1}) {
if (padding >= filterSize) break;
// NNPACK only supports stride = 1 if batchSize > 1
if ((conv1 == "NNPACKConv-CPU" || conv2 == "NNPACKConv-CPU") &&
batchSize > 1 && stride > 1)
break;
size_t outputSize =
(inputSize - filterSize + 2 * padding + stride) / stride;
VLOG(3) << " batchSize=" << batchSize
<< " inputChannels=" << inputChannels
<< " inputHeight=" << inputSize
<< " inputWidth=" << inputSize
<< " outputChannels=" << outputChannels
<< " filterHeight=" << filterSize
<< " filterWidth=" << filterSize
<< " outputHeight=" << outputSize
<< " outputWidth=" << outputSize << " stride=" << stride
<< " padding=" << padding;
std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride};
Compare2Function<DType1, DType2> test(
conv1,
conv2,
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("groups", (size_t)1)
.set("algo", (std::string) "auto"));
TensorShape input{
batchSize, inputChannels, inputSize, inputSize};
TensorShape filter{
outputChannels, inputChannels, filterSize, filterSize};
TensorShape output{
batchSize, outputChannels, outputSize, outputSize};
function(test, input, filter, output);
}
}
}
}
}
}
}
}
/**
* \brief A convolution function test interface for
* image height is not equal image width.
*/
template <DeviceType DType1, DeviceType DType2>
void Convolution2(const std::string& conv1,
const std::string& conv2,
Function<DType1, DType2> function) {
for (size_t batchSize : {4}) {
for (size_t inputHeight : {7, 31}) {
for (size_t inputWidth : {10, 54}) {
for (size_t filterHeight : {1, 5}) {
for (size_t filterWidth : {3, 7}) {
for (size_t inputChannels : {7}) {
for (size_t outputChannels : {7}) {
size_t stride = 1;
size_t padding = 0;
size_t outputHeight =
(inputHeight - filterHeight + 2 * padding + stride) /
stride;
size_t outputWidth =
(inputWidth - filterWidth + 2 * padding + stride) / stride;
VLOG(3) << " batchSize=" << batchSize
<< " inputChannels=" << inputChannels
<< " inputHeight=" << inputHeight
<< " inputWidth=" << inputWidth
<< " outputChannels=" << outputChannels
<< " filterHeight=" << filterHeight
<< " filterWidth=" << filterWidth
<< " outputHeight=" << outputHeight
<< " outputWidth=" << outputWidth
<< " stride=" << stride << " padding=" << padding;
std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride};
Compare2Function<DType1, DType2> test(
conv1,
conv2,
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("groups", (size_t)1)
.set("algo", (std::string) "auto"));
TensorShape input{
batchSize, inputChannels, inputHeight, inputWidth};
TensorShape filter{
outputChannels, inputChannels, filterHeight, filterWidth};
TensorShape output{
batchSize, outputChannels, outputHeight, outputWidth};
function(test, input, filter, output);
}
}
}
}
}
}
}
}
/**
* \brief A convolution function test interface for depthwise convolution.
*/
template <DeviceType DType1, DeviceType DType2>
void DepthwiseConvolution(const std::string& conv1,
const std::string& conv2,
Function<DType1, DType2> function) {
for (size_t batchSize : {1, 32}) {
for (size_t inputSize : {7, 14, 54}) {
for (size_t filterSize : {3, 4}) {
for (size_t inputChannels : {32}) {
for (size_t outputChannels : {32, 64}) {
for (size_t stride : {1, 2}) {
for (size_t padding : {0, 1}) {
// NNPACK only supports stride = 1 if batchSize > 1,
// and there has some bug when batchSize > 1 and groups != 1
if ((conv1 == "NNPACKConv-CPU" || conv2 == "NNPACKConv-CPU") &&
batchSize > 1)
break;
size_t outputSize =
(inputSize - filterSize + 2 * padding + stride) / stride;
VLOG(3) << " batchSize=" << batchSize
<< " inputChannels=" << inputChannels
<< " inputHeight=" << inputSize
<< " inputWidth=" << inputSize
<< " outputChannels=" << outputChannels
<< " filterHeight=" << filterSize
<< " filterWidth=" << filterSize
<< " outputHeight=" << outputSize
<< " outputWidth=" << outputSize << " stride=" << stride
<< " padding=" << padding;
std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride};
size_t groups = inputChannels;
Compare2Function<DType1, DType2> test(
conv1,
conv2,
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("groups", groups)
.set("algo", (std::string) "auto"));
TensorShape input{
batchSize, inputChannels, inputSize, inputSize};
TensorShape filter{groups,
outputChannels / groups,
inputChannels / groups,
filterSize,
filterSize};
TensorShape output{
batchSize, outputChannels, outputSize, outputSize};
function(test, input, filter, output);
}
}
}
}
}
}
}
}
} // namespace paddle
...@@ -13,13 +13,25 @@ See the License for the specific language governing permissions and ...@@ -13,13 +13,25 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "ConvOpTest.h"
#include <paddle/framework/op_registry.h> namespace paddle {
USE_OP(mean); #ifndef PADDLE_ONLY_CPU
TEST(DepthwiseConv, Forward) {
DepthwiseConvolution<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU>(
"GemmConv-CPU", "DepthwiseConv-GPU", forward);
}
TEST(DepthwiseConv, BackwardInput) {
DepthwiseConvolution<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU>(
"GemmConvGradInput-CPU", "DepthwiseConvGradInput-GPU", backward_input);
}
TEST(MeanOp, GetOpProto) { TEST(DepthwiseConv, BackwardFilter) {
auto& protos = paddle::framework::OpRegistry::protos(); DepthwiseConvolution<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU>(
auto it = protos.find("mean"); "GemmConvGradFilter-CPU", "DepthwiseConvGradFilter-GPU", backward_filter);
ASSERT_NE(it, protos.end());
} }
#endif
} // namespace paddle
...@@ -93,8 +93,8 @@ TEST(Arguments, Matrix) { ...@@ -93,8 +93,8 @@ TEST(Arguments, Matrix) {
MatrixPtr matrix = Matrix::create(100, 200); MatrixPtr matrix = Matrix::create(100, 200);
CheckBufferArg check = [=](const BufferArg& arg) { CheckBufferArg check = [=](const BufferArg& arg) {
EXPECT_EQ(arg.shape().ndims(), 2U); EXPECT_EQ(arg.shape().ndims(), 2U);
EXPECT_EQ(arg.shape()[0], 100); EXPECT_EQ(arg.shape()[0], 100U);
EXPECT_EQ(arg.shape()[1], 200); EXPECT_EQ(arg.shape()[1], 200U);
EXPECT_EQ(arg.data(), matrix->getData()); EXPECT_EQ(arg.data(), matrix->getData());
EXPECT_EQ(arg.matrix<DEVICE_TYPE_CPU>().getHeight(), matrix->getHeight()); EXPECT_EQ(arg.matrix<DEVICE_TYPE_CPU>().getHeight(), matrix->getHeight());
...@@ -112,8 +112,8 @@ TEST(Arguments, Matrix) { ...@@ -112,8 +112,8 @@ TEST(Arguments, Matrix) {
TEST(Arguments, Vector) { TEST(Arguments, Vector) {
VectorPtr vector = Vector::create(100, false); VectorPtr vector = Vector::create(100, false);
CheckBufferArg check = [=](const BufferArg& arg) { CheckBufferArg check = [=](const BufferArg& arg) {
EXPECT_EQ(arg.shape().ndims(), 1); EXPECT_EQ(arg.shape().ndims(), 1U);
EXPECT_EQ(arg.shape()[0], 100); EXPECT_EQ(arg.shape()[0], 100U);
EXPECT_EQ(arg.data(), vector->getData()); EXPECT_EQ(arg.data(), vector->getData());
CpuVector inVector = arg.vector<real, DEVICE_TYPE_CPU>(); CpuVector inVector = arg.vector<real, DEVICE_TYPE_CPU>();
...@@ -131,9 +131,9 @@ TEST(Arguments, Vector) { ...@@ -131,9 +131,9 @@ TEST(Arguments, Vector) {
TEST(Arguments, CpuSparseMatrix) { TEST(Arguments, CpuSparseMatrix) {
CpuSparseMatrix sparse(200, 300, 50); CpuSparseMatrix sparse(200, 300, 50);
CheckBufferArg check = [=](const BufferArg& arg) { CheckBufferArg check = [=](const BufferArg& arg) {
EXPECT_EQ(arg.shape().ndims(), 2); EXPECT_EQ(arg.shape().ndims(), 2U);
EXPECT_EQ(arg.shape()[0], 200); EXPECT_EQ(arg.shape()[0], 200U);
EXPECT_EQ(arg.shape()[1], 300); EXPECT_EQ(arg.shape()[1], 300U);
EXPECT_EQ(arg.data(), sparse.getData()); EXPECT_EQ(arg.data(), sparse.getData());
// CHECK_EQ(arg.sparse().nnz(), 50); // CHECK_EQ(arg.sparse().nnz(), 50);
// CHECK_EQ(arg.sparse().dataFormat(), SPARSE_CSR_FORMAT); // CHECK_EQ(arg.sparse().dataFormat(), SPARSE_CSR_FORMAT);
...@@ -152,10 +152,10 @@ TEST(Arguments, CpuSparseMatrix) { ...@@ -152,10 +152,10 @@ TEST(Arguments, CpuSparseMatrix) {
TEST(Arguments, BufferArg) { TEST(Arguments, BufferArg) {
BufferArg arg(nullptr, VALUE_TYPE_FLOAT, {1, 2, 3}); BufferArg arg(nullptr, VALUE_TYPE_FLOAT, {1, 2, 3});
CheckBufferArg check = [=](const BufferArg& arg) { CheckBufferArg check = [=](const BufferArg& arg) {
EXPECT_EQ(arg.shape().ndims(), 3); EXPECT_EQ(arg.shape().ndims(), 3U);
EXPECT_EQ(arg.shape()[0], 1); EXPECT_EQ(arg.shape()[0], 1U);
EXPECT_EQ(arg.shape()[1], 2); EXPECT_EQ(arg.shape()[1], 2U);
EXPECT_EQ(arg.shape()[2], 3); EXPECT_EQ(arg.shape()[2], 3U);
}; };
BufferArgs argments; BufferArgs argments;
......
...@@ -13,16 +13,38 @@ See the License for the specific language governing permissions and ...@@ -13,16 +13,38 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#define private public #include "ConvOpTest.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
USE_OP(add_two);
TEST(GemmConv, NaiveConv) {
TEST(AddOp, GetOpProto) { Convolution<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU>(
auto& protos = paddle::framework::OpRegistry::protos(); "NaiveConv-CPU", "GemmConv-CPU", forward);
auto it = protos.find("add_two"); Convolution2<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU>(
ASSERT_NE(it, protos.end()); "NaiveConv-CPU", "GemmConv-CPU", forward);
auto& op_creators = paddle::framework::OpRegistry::op_creators();
auto it1 = op_creators.find("add_two_grad");
ASSERT_NE(it1, op_creators.end());
} }
#ifndef PADDLE_ONLY_CPU
TEST(GemmConv, Forward) {
Convolution<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU>(
"GemmConv-CPU", "GemmConv-GPU", forward);
Convolution2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU>(
"GemmConv-CPU", "GemmConv-GPU", forward);
}
TEST(GemmConv, BackwardInput) {
Convolution<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU>(
"GemmConvGradInput-CPU", "GemmConvGradInput-GPU", backward_input);
Convolution2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU>(
"GemmConvGradInput-CPU", "GemmConvGradInput-GPU", backward_input);
}
TEST(GemmConv, BackwardFilter) {
Convolution<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU>(
"GemmConvGradFilter-CPU", "GemmConvGradFilter-GPU", backward_filter);
Convolution2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU>(
"GemmConvGradFilter-CPU", "GemmConvGradFilter-GPU", backward_filter);
}
#endif
} // namespace paddle
...@@ -44,7 +44,7 @@ TEST(TensorShape, GetAndSet) { ...@@ -44,7 +44,7 @@ TEST(TensorShape, GetAndSet) {
EXPECT_EQ(t.ndims(), 3U); EXPECT_EQ(t.ndims(), 3U);
EXPECT_EQ(t.getElements(), 6U); EXPECT_EQ(t.getElements(), 6U);
EXPECT_EQ(t[1], 2); EXPECT_EQ(t[1], 2U);
t.setDim(1, 100); t.setDim(1, 100);
EXPECT_EQ(t.getElements(), 300U); EXPECT_EQ(t.getElements(), 300U);
EXPECT_EQ(t[1], 100U); EXPECT_EQ(t[1], 100U);
......
...@@ -196,22 +196,23 @@ public: ...@@ -196,22 +196,23 @@ public:
CHECK_EQ(status, nnp_status_success); CHECK_EQ(status, nnp_status_success);
} }
} else { } else {
for (size_t g = 0; g < groups_; g++) {
// only supports stride = 1 // only supports stride = 1
CHECK_EQ(strideH(), 1); CHECK_EQ(strideH(), 1);
CHECK_EQ(strideW(), 1); CHECK_EQ(strideW(), 1);
nnp_status status =
nnp_convolution_output(algorithm_, // TODO(hedaoyuan): There has some bug when batchSize > 1 and groups_ > 1.
CHECK_EQ(groups_, static_cast<size_t>(1));
nnp_status status = nnp_convolution_output(algorithm_,
batchSize, batchSize,
inputChannels / groups_, inputChannels,
outputChannels / groups_, outputChannels,
inputSize, inputSize,
padding, padding,
kernelSize, kernelSize,
inputData + inputOffset * g, inputData,
filterData + filterOffset * g, filterData,
nullptr, /* bias */ nullptr, /* bias */
outputData + outputOffset * g, outputData,
bufferPtr, bufferPtr,
sizePtr, sizePtr,
nnp_activation_identity, nnp_activation_identity,
...@@ -221,7 +222,6 @@ public: ...@@ -221,7 +222,6 @@ public:
CHECK_EQ(status, nnp_status_success); CHECK_EQ(status, nnp_status_success);
} }
} }
}
static void create_nnpack_threadpool() { static void create_nnpack_threadpool() {
if (FLAGS_nnpack_num_threads && threadpool_ == nullptr) { if (FLAGS_nnpack_num_threads && threadpool_ == nullptr) {
......
...@@ -13,87 +13,18 @@ See the License for the specific language governing permissions and ...@@ -13,87 +13,18 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "paddle/function/Function.h" #include "paddle/function/ConvOpTest.h"
#include "paddle/function/FunctionTest.h"
DEFINE_string(algo,
"auto",
"The algorithm (auto, ft8x8, ft16x16, wt8x8, "
"implicit-gemm, or direct) for computing convolution of NNPACK.");
namespace paddle { namespace paddle {
#define IS_NNPACK_SUPPORT(algo, filterSize, stride) \ TEST(NNPACK, Forward) {
if (algo == "direct" && filterSize != 1) continue; \ Convolution<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU>(
if (algo == "direct" && batchSize != 1) continue; \ "GemmConv-CPU", "NNPACKConv-CPU", forward);
if (algo == "wt8x8" && filterSize != 3) continue; \ }
if (algo == "implicit-gemm" && batchSize != 1) continue; \
if (algo != "auto" && algo != "implicit-gemm" && stride > 1) continue;
class ConvolutionTest {
public:
ConvolutionTest(const std::string& conv1,
const std::string& conv2,
std::string algo = "auto") {
for (size_t batchSize : {1, 32}) {
for (size_t inputSize : {7, 14, 54}) {
for (size_t filterSize : {1, 3, 5}) {
for (size_t inputChannels : {3, 64}) {
for (size_t outputChannels : {3, 64, 128}) {
if (inputChannels < outputChannels) break;
for (size_t stride : {1, 2}) {
// if batchSize > 1 NNPACKConv only supports stride = 1
if (batchSize > 1 && stride > 1) break;
for (size_t padding : {0, 1}) {
if (padding >= filterSize) break;
size_t outputSize =
(inputSize - filterSize + 2 * padding + stride) / stride;
IS_NNPACK_SUPPORT(algo, filterSize, stride);
LOG(INFO) << " batchSize=" << batchSize
<< " inputChannels=" << inputChannels
<< " inputHeight=" << inputSize
<< " inputWidth=" << inputSize
<< " outputChannels=" << outputChannels
<< " filterHeight=" << filterSize
<< " filterWidth=" << filterSize
<< " outputHeight=" << outputSize
<< " outputWidth=" << outputSize
<< " stride=" << stride << " padding=" << padding;
std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride};
Compare2Function<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU> test(
conv1,
conv2,
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("groups", (size_t)1)
.set("algo", algo));
TensorShape shape0{
batchSize, inputChannels, inputSize, inputSize};
TensorShape shape1{
outputChannels, inputChannels, filterSize, filterSize};
TensorShape shape2{
batchSize, outputChannels, outputSize, outputSize};
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape0));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape1));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape2));
test.run();
}
}
}
}
}
}
}
}
};
TEST(Convolution, NNPACK) { TEST(NNPACK, Depthwise) {
// NNPACK only supports stride = 1 DepthwiseConvolution<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU>(
ConvolutionTest test("GemmConv-CPU", "NNPACKConv-CPU", FLAGS_algo); "GemmConv-CPU", "NNPACKConv-CPU", forward);
} }
} // namespace paddle } // namespace paddle
...@@ -23,6 +23,17 @@ endmacro() ...@@ -23,6 +23,17 @@ endmacro()
filter_test(GSERVER_HEADER) filter_test(GSERVER_HEADER)
filter_test(GSERVER_SOURCES) filter_test(GSERVER_SOURCES)
if(NOT WITH_MKLDNN)
file(GLOB_RECURSE DNN_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.h")
file(GLOB_RECURSE DNN_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.cpp")
list(REMOVE_ITEM GSERVER_HEADER ${DNN_HEADER})
list(REMOVE_ITEM GSERVER_SOURCES ${DNN_SOURCES})
message(STATUS "Skip compiling with MKLDNNLayers and MKLDNNActivations")
else()
message(STATUS "Compile with MKLDNNLayers and MKLDNNActivations")
endif()
if(NOT WITH_GPU) if(NOT WITH_GPU)
list(REMOVE_ITEM GSERVER_HEADER list(REMOVE_ITEM GSERVER_HEADER
layers/CudnnConvBaseLayer.h layers/CudnnConvBaseLayer.h
......
...@@ -112,7 +112,6 @@ BEGIN_DEFINE_ACTIVATION(softmax) ...@@ -112,7 +112,6 @@ BEGIN_DEFINE_ACTIVATION(softmax)
private: private:
MatrixPtr sftMaxSum_; MatrixPtr sftMaxSum_;
MatrixPtr sftMaxDot_; MatrixPtr sftMaxDot_;
MatrixPtr one_;
public: public:
Error __must_check forward(Argument& act) { Error __must_check forward(Argument& act) {
...@@ -138,14 +137,6 @@ Error __must_check backward(Argument& act) { ...@@ -138,14 +137,6 @@ Error __must_check backward(Argument& act) {
1, 1,
/* trans */ false, /* trans */ false,
useGpu(act.deviceId)); useGpu(act.deviceId));
if (!one_ || one_->getWidth() != outputG->getWidth()) {
Matrix::resizeOrCreate(one_,
1,
outputG->getWidth(),
/* trans */ false,
useGpu(act.deviceId));
one_->one();
}
sftMaxDot_->dotMul(*outputG, *outputV); sftMaxDot_->dotMul(*outputG, *outputV);
sftMaxSum_->colMerge(*sftMaxDot_); sftMaxSum_->colMerge(*sftMaxDot_);
......
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "mkldnn.hpp"
namespace paddle {
typedef enum {
MKLDNN_BASE = 1, // basical info of MKLDNN
MKLDNN_TESTS = 1, // gtest info of MKLDNN
MKLDNN_SIZES = 2, // size info of MKLDNN
MKLDNN_FMTS = 3, // format info of MKLDNN
MKLDNN_ALL = 4, // show all info of MKLDNN
} MKLDNN_LOG_LEVEL;
/**
* @brief MKLDNN CPU engine.
*
*/
class CPUEngine {
public:
static CPUEngine& Instance() {
// Thread-safe in C++11.
static CPUEngine myInstance;
return myInstance;
}
// Disallow copy or move
CPUEngine(const CPUEngine&) = delete; // Copy constructor
CPUEngine(CPUEngine&&) = delete; // Move constructor
CPUEngine& operator=(const CPUEngine&) = delete; // Copy assignment
CPUEngine& operator=(CPUEngine&&) = delete; // Move assignment
mkldnn::engine& getEngine() { return cpuEngine_; }
protected:
CPUEngine() : cpuEngine_(mkldnn::engine::cpu, 0) {}
// CPUEngine() : cpuEngine_(mkldnn::engine::cpu_lazy, 0) {}
~CPUEngine() {}
private:
mkldnn::engine cpuEngine_;
};
/**
* @brief MKLDNN Stream.
*
*/
class MKLDNNStream {
public:
MKLDNNStream() : ready_(false) { resetState(); }
virtual ~MKLDNNStream() {}
/**
* @brief Submit stream
* @param prims The primitives vector
* @param block Waiting for the stream to complete
*/
void submit(std::vector<mkldnn::primitive>& prims, bool block = true) {
resetState();
stream_->submit(prims).wait(block);
ready_ = false;
}
/**
* @brief Reset the mkldnn stream
*/
void resetState() {
if (ready_) {
return;
}
// TODO(TJ): change me when mkldnn have method to reset this state
// stream_.reset(new mkldnn::stream(mkldnn::stream::kind::lazy));
stream_.reset(new mkldnn::stream(mkldnn::stream::kind::eager));
ready_ = true;
}
private:
bool ready_;
std::shared_ptr<mkldnn::stream> stream_;
};
} // namespace paddle
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "MKLDNNFcLayer.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
using namespace mkldnn; // NOLINT
typedef memory::format format;
typedef inner_product_forward fc_fwd;
typedef inner_product_backward_weights fc_bwdWgt;
typedef inner_product_backward_data fc_bwdData;
namespace paddle {
REGISTER_LAYER(mkldnn_fc, MKLDNNFcLayer);
bool MKLDNNFcLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
if (!MKLDNNLayer::init(layerMap, parameterMap)) {
return false;
}
CHECK_EQ(inputLayers_.size(), 1) << "Only support one input layer yet";
CHECK_EQ(inputLayers_.size(), parameters_.size());
CHECK(!parameters_[0]->isSparse()) << "Do not support sparse yet";
// output size, cat not be changed
oc_ = getSize();
oh_ = 1;
ow_ = 1;
// input size can not change in FC
iLayerSize_ = inputLayers_[0]->getSize();
CHECK_EQ(parameters_[0]->getSize(), iLayerSize_ * oc_);
// create weight
weight_ =
std::unique_ptr<Weight>(new Weight(oc_, iLayerSize_, parameters_[0], 0));
// create biases
if (biasParameter_.get() != NULL) {
biases_ = std::unique_ptr<Weight>(new Weight(1, oc_, biasParameter_));
}
return true;
}
void MKLDNNFcLayer::convertWeightsFromPaddle() {
if (FLAGS_use_mkldnn_wgt) {
return;
}
if (hasInitedWgt_) {
return;
}
// The weight_ is transposed from initial paddle weight
MatrixPtr paddleWgt = Matrix::create(
weight_->getW()->getData(), iLayerSize_, oc_, false, false);
// TODO(TJ): remove this print when do not need differ weights
std::ostringstream ostr;
paddleWgt->print(ostr);
VLOG(MKLDNN_ALL) << "Initial Weight from paddle: " << std::endl << ostr.str();
// The mkldnn weight is transposed from initial paddle matrix
MatrixPtr paddleWgtT;
paddleWgt->transpose(paddleWgtT, true);
weight_->getW()->copyFrom(*paddleWgtT);
hasInitedWgt_ = true;
}
void MKLDNNFcLayer::convertWeightsToPaddle() {
MatrixPtr dnnWgt = weight_->getW();
MatrixPtr paddleWgt;
dnnWgt->transpose(paddleWgt, true);
// copy paddle weight and override on weight_
MatrixPtr dnnWgtT = Matrix::create(
dnnWgt->getData(), dnnWgt->getWidth(), dnnWgt->getHeight(), false, false);
dnnWgtT->copyFrom(*paddleWgt);
}
void MKLDNNFcLayer::reshape() {
const Argument& input = getInput(0);
int batchSize = input.getBatchSize();
if (bs_ == batchSize) {
return;
}
bs_ = batchSize;
ih_ = input.getFrameHeight();
iw_ = input.getFrameWidth();
if (ih_ == 0) {
ih_ = 1;
}
if (iw_ == 0) {
iw_ = 1;
}
hasSpatial_ = true;
if (ih_ == 1 && iw_ == 1) {
hasSpatial_ = false;
}
CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize());
ic_ = iLayerSize_ / (ih_ * iw_);
CHECK_EQ(size_t(ic_ * ih_ * iw_), iLayerSize_) << "not divisible";
CHECK_EQ(size_t(oc_), getSize());
printSizeInfo();
// reset output
output_.setFrameHeight(oh_);
output_.setFrameWidth(ow_);
resetOutput(bs_, oc_);
// reset mkldnn forward
resetFwd();
needResetBwd_ = true;
convertWeightsFromPaddle();
}
void MKLDNNFcLayer::resetFwd() {
bool hasBias = biases_ && biases_->getW();
real* iData = getInputValue(0)->getData();
real* oData = getOutputValue()->getData();
real* wData = weight_->getW()->getData();
real* bData = hasBias ? biases_->getW()->getData() : NULL;
// TODO(TJ): below create should be covered in MkldnnMatrix
// create memory desc
memory::desc iMD = hasSpatial_ ? createMD({bs_, ic_, ih_, iw_}, format::nchw)
: createMD({bs_, ic_}, format::nc);
memory::desc wMD = hasSpatial_ ? createMD({oc_, ic_, ih_, iw_}, format::oihw)
: createMD({oc_, ic_}, format::oi);
memory::desc bMD = bData != NULL ? createMD({oc_}, format::x)
: createMD({}, format::format_undef);
memory::desc oMD = createMD({bs_, oc_}, format::nc);
// create memory primitive desc and memory self
inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData));
wgtVal_.reset(new memory(memory::primitive_desc(wMD, engine_), wData));
outVal_.reset(new memory(memory::primitive_desc(oMD, engine_), oData));
prop_kind pk = prop_kind::forward;
fc_fwd::desc fwdDesc = bData != NULL ? fc_fwd::desc(pk, iMD, wMD, bMD, oMD)
: fc_fwd::desc(pk, iMD, wMD, oMD);
fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_);
if (bData != NULL) {
biasVal_.reset(new memory(memory::primitive_desc(bMD, engine_), bData));
fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_));
} else {
fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *outVal_));
}
pipelineFwd_.clear();
pipelineFwd_.push_back(*fwd_);
}
void MKLDNNFcLayer::resetBwd() {
if (!needResetBwd_) {
return;
}
needResetBwd_ = false;
bool hasBias = biases_ && biases_->getWGrad();
real* iData = getInputValue(0)->getData();
real* iDiff = getInputGrad(0) != nullptr ? getInputGrad(0)->getData() : NULL;
real* oDiff = getOutputGrad()->getData();
real* wDiff = weight_->getWGrad()->getData();
real* bDiff = hasBias ? biases_->getWGrad()->getData() : NULL;
/// backward weight
// create memory desc for backward memory
memory::desc iMD = hasSpatial_ ? createMD({bs_, ic_, ih_, iw_}, format::nchw)
: createMD({bs_, ic_}, format::nc);
memory::desc wMD = hasSpatial_ ? createMD({oc_, ic_, ih_, iw_}, format::oihw)
: createMD({oc_, ic_}, format::oi);
memory::desc oMD = createMD({bs_, oc_}, format::nc);
memory::desc bMD = bDiff != NULL ? createMD({oc_}, format::x)
: createMD({}, format::format_undef);
if (inVal_) {
// update data
inVal_->set_data_handle(iData);
} else {
inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData));
}
// create memory primitive desc and memory self
wgtGrad_.reset(new memory(memory::primitive_desc(wMD, engine_), wDiff));
outGrad_.reset(new memory(memory::primitive_desc(oMD, engine_), oDiff));
fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward, iMD, wMD, oMD);
fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_);
fc_bwdWgt::desc bwdWgtDesc = bDiff != NULL
? fc_bwdWgt::desc(iMD, wMD, bMD, oMD)
: fc_bwdWgt::desc(iMD, wMD, oMD);
fc_bwdWgt::primitive_desc bwdWgtPD =
fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD);
if (bDiff != NULL) {
biasGrad_.reset(new memory(memory::primitive_desc(bMD, engine_), bDiff));
bwdWgt_.reset(
new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_, *biasGrad_));
} else {
bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_));
}
pipelineBwd_.clear();
pipelineBwd_.push_back(*bwdWgt_);
/// backward data
if (iDiff == NULL) {
return;
}
fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(iMD, wMD, oMD);
fc_bwdData::primitive_desc bwdDataPD =
fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD);
inGrad_.reset(new memory(memory::primitive_desc(iMD, engine_), iDiff));
CHECK(wgtVal_) << "Should have weight memory";
bwdData_.reset(new fc_bwdData(bwdDataPD, *outGrad_, *wgtVal_, *inGrad_));
pipelineBwd_.push_back(*bwdData_);
}
void MKLDNNFcLayer::forward(PassType passType) {
Layer::forward(passType);
reshape();
{
REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str());
// update input data
// since it might be changed if this is after data layer
real* iData = getInputValue(0)->getData();
inVal_->set_data_handle(iData);
// just submit forward pipeline
stream_->submit(pipelineFwd_);
}
/* activation */ {
REGISTER_TIMER_INFO("FwActTimer", getName().c_str());
forwardActivation();
}
}
void MKLDNNFcLayer::backward(const UpdateCallback& callback) {
/* Do derivation */ {
REGISTER_TIMER_INFO("BpActTimer", getName().c_str());
backwardActivation();
}
{
REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str());
resetBwd();
// update diff
real* oDiff = getOutputGrad()->getData();
outGrad_->set_data_handle(oDiff);
// just sumbmit backward pipeline
stream_->submit(pipelineBwd_);
}
{
REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
weight_->getParameterPtr()->incUpdate(callback);
if (biases_ && biases_->getWGrad()) {
biases_->getParameterPtr()->incUpdate(callback);
}
}
}
} // namespace paddle
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "MKLDNNLayer.h"
#include "mkldnn.hpp"
namespace paddle {
/**
* @brief A subclass of MKLDNNLayer fc layer.
*
* The config file api is mkldnn_fc
*/
class MKLDNNFcLayer : public MKLDNNLayer {
protected:
// input layer size, can not be change after init
size_t iLayerSize_; // == ic * ih * iw
// if has already init the weight
bool hasInitedWgt_;
// if input layer has image size info (ih>1 && iw>1)
bool hasSpatial_;
// fc weight and bias
std::unique_ptr<Weight> weight_;
std::unique_ptr<Weight> biases_;
public:
explicit MKLDNNFcLayer(const LayerConfig& config)
: MKLDNNLayer(config), hasInitedWgt_(false), hasSpatial_(true) {}
~MKLDNNFcLayer() {}
bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;
void convertWeightsFromPaddle() override;
void convertWeightsToPaddle() override;
void forward(PassType passType) override;
void backward(const UpdateCallback& callback) override;
protected:
/**
* reshape the input image sizes
* and reset output buffer size
* and reset mkldnn forward
*/
void reshape();
/**
* reset the forward primitve and memory
* only would be called when input size changes
*/
void resetFwd();
/**
* reset the backward primitve and memory for mkldnn fc
* only would be called when needed
*/
void resetBwd();
};
} // namespace paddle
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "Layer.h"
#include "MKLDNNBase.h"
#include "mkldnn.hpp"
DECLARE_bool(use_mkldnn);
DECLARE_bool(use_mkldnn_wgt);
namespace paddle {
class MKLDNNLayer;
typedef std::shared_ptr<MKLDNNLayer> MKLDNNLayerPtr;
/**
* @brief Base class of MKLDNNlayer.
*
*/
class MKLDNNLayer : public Layer {
protected:
// batch size
int bs_;
// input image channel, height and width
int ic_, ih_, iw_;
// output image channel, height and width
int oc_, oh_, ow_;
// backward also need reset after reset forward handle
bool needResetBwd_;
// mkldnn engine, stream and primivtives
mkldnn::engine engine_;
std::shared_ptr<MKLDNNStream> stream_;
std::shared_ptr<mkldnn::primitive> fwd_;
std::shared_ptr<mkldnn::primitive> bwdWgt_;
std::shared_ptr<mkldnn::primitive> bwdData_;
std::vector<mkldnn::primitive> pipelineFwd_;
std::vector<mkldnn::primitive> pipelineBwd_;
// TODO(TJ): change below memory as MKLDNNMatrixPtr type
std::shared_ptr<mkldnn::memory> inVal_;
std::shared_ptr<mkldnn::memory> inGrad_;
std::shared_ptr<mkldnn::memory> outVal_;
std::shared_ptr<mkldnn::memory> outGrad_;
std::shared_ptr<mkldnn::memory> wgtVal_;
std::shared_ptr<mkldnn::memory> wgtGrad_;
std::shared_ptr<mkldnn::memory> biasVal_;
std::shared_ptr<mkldnn::memory> biasGrad_;
public:
explicit MKLDNNLayer(const LayerConfig& config)
: Layer(config),
bs_(0),
ic_(0),
ih_(0),
iw_(0),
oc_(0),
oh_(0),
ow_(0),
needResetBwd_(true),
engine_(mkldnn::engine::cpu, 0),
stream_(nullptr),
fwd_(nullptr),
bwdWgt_(nullptr),
bwdData_(nullptr) {}
~MKLDNNLayer() {}
virtual bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
if (!Layer::init(layerMap, parameterMap)) {
return false;
}
CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn."
<< "Please set WITH_MKLDNN=ON "
<< "and set use_mkldnn=True";
stream_.reset(new MKLDNNStream());
engine_ = CPUEngine::Instance().getEngine();
// TODO(TJ): deivecId
return true;
}
/**
* convert weight from paddle format to mkldnn format
* weight_ will be override
*/
virtual void convertWeightsFromPaddle() {}
/**
* convert mkldnn weight to paddle format
* weight_ will be override
*/
virtual void convertWeightsToPaddle() {}
/**
* print info about sizes
*/
virtual void printSizeInfo() {
VLOG(MKLDNN_SIZES) << getName() << ": bs: " << bs_ << ", ic: " << ic_
<< ", ih: " << ih_ << ", iw: " << iw_ << ", oc: " << oc_
<< ", oh: " << oh_ << ", ow: " << ow_;
}
// TODO(TJ): move to MkldnnMatrix
// create memory desc
inline mkldnn::memory::desc createMD(
mkldnn::memory::dims dims,
mkldnn::memory::format fmt,
mkldnn::memory::data_type type = mkldnn::memory::data_type::f32) {
// TODO(TJ): isFmtSuppoted(fmt)
return mkldnn::memory::desc(dims, type, fmt);
}
};
} // namespace paddle
...@@ -96,7 +96,7 @@ void SubNestedSequenceLayer::calSelectedCols( ...@@ -96,7 +96,7 @@ void SubNestedSequenceLayer::calSelectedCols(
for (size_t i = 0; i < seqNum; ++i) { for (size_t i = 0; i < seqNum; ++i) {
for (size_t j = 0; j < beamSize; ++j) { for (size_t j = 0; j < beamSize; ++j) {
if (selectedIndices->getElement(i, j) == -1.) break; if (selectedIndices->getElement(i, j) == -1.) break;
int selSubSeqIdx = selectedIndices->getElement(i, j); size_t selSubSeqIdx = selectedIndices->getElement(i, j);
CHECK_GT(inputSeqInfoVec_[i].size() - 1, selSubSeqIdx); CHECK_GT(inputSeqInfoVec_[i].size() - 1, selSubSeqIdx);
size_t subSeqLen = inputSeqInfoVec_[i][selSubSeqIdx + 1] - size_t subSeqLen = inputSeqInfoVec_[i][selSubSeqIdx + 1] -
...@@ -135,7 +135,7 @@ void SubNestedSequenceLayer::forward(PassType passType) { ...@@ -135,7 +135,7 @@ void SubNestedSequenceLayer::forward(PassType passType) {
CHECK(inputSeq.hasSubseq()) << "The first input of SubNestSequence layer " CHECK(inputSeq.hasSubseq()) << "The first input of SubNestSequence layer "
<< "must be a nested sequence."; << "must be a nested sequence.";
const MatrixPtr selectedIndices = getInputValue(1); const MatrixPtr selectedIndices = getInputValue(1);
CHECK_EQ(inputSeq.getNumSequences(), selectedIndices->getHeight()); CHECK_EQ(size_t(inputSeq.getNumSequences()), selectedIndices->getHeight());
if (dynamic_cast<GpuMatrix*>(selectedIndices.get())) { if (dynamic_cast<GpuMatrix*>(selectedIndices.get())) {
/* /*
......
...@@ -9,7 +9,7 @@ add_unittest_without_exec(test_ProtoDataProvider ...@@ -9,7 +9,7 @@ add_unittest_without_exec(test_ProtoDataProvider
# mkdir will get error. # mkdir will get error.
add_test(NAME test_ProtoDataProvider add_test(NAME test_ProtoDataProvider
COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_ProtoDataProvider COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_ProtoDataProvider
WORKING_DIRECTORY ${PROJ_ROOT}/paddle) WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle)
################# test_LayerGrad ####################### ################# test_LayerGrad #######################
add_unittest_without_exec(test_LayerGrad add_unittest_without_exec(test_LayerGrad
...@@ -18,6 +18,15 @@ add_unittest_without_exec(test_LayerGrad ...@@ -18,6 +18,15 @@ add_unittest_without_exec(test_LayerGrad
add_test(NAME test_LayerGrad add_test(NAME test_LayerGrad
COMMAND test_LayerGrad) COMMAND test_LayerGrad)
########## test_Mkldnn layers and activations ##########
if(WITH_MKLDNN)
add_unittest_without_exec(test_MKLDNN
test_MKLDNN.cpp
MKLDNNTester.cpp
LayerGradUtil.cpp)
add_test(NAME test_MKLDNN COMMAND test_MKLDNN)
endif()
################ test_CRFLayerGrad #################### ################ test_CRFLayerGrad ####################
add_unittest_without_exec(test_CRFLayerGrad add_unittest_without_exec(test_CRFLayerGrad
test_CRFLayerGrad.cpp test_CRFLayerGrad.cpp
...@@ -92,8 +101,8 @@ if(WITH_PYTHON) ...@@ -92,8 +101,8 @@ if(WITH_PYTHON)
test_PyDataProvider.cpp) test_PyDataProvider.cpp)
add_test(NAME test_PyDataProvider add_test(NAME test_PyDataProvider
COMMAND .set_python_path.sh -d ./gserver/tests:${PROJ_ROOT}/python/ ${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProvider COMMAND .set_python_path.sh -d ./gserver/tests:${PADDLE_SOURCE_DIR}/python/ ${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProvider
WORKING_DIRECTORY ${PROJ_ROOT}/paddle) WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle)
endif() endif()
############### test_RecurrentLayer ####################### ############### test_RecurrentLayer #######################
...@@ -106,7 +115,7 @@ if(NOT WITH_DOUBLE) ...@@ -106,7 +115,7 @@ if(NOT WITH_DOUBLE)
add_test(NAME test_WarpCTCLayer add_test(NAME test_WarpCTCLayer
COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_WarpCTCLayer --warpctc_dir=${WARPCTC_LIB_DIR} COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_WarpCTCLayer --warpctc_dir=${WARPCTC_LIB_DIR}
WORKING_DIRECTORY ${PROJ_ROOT}/paddle) WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle)
endif() endif()
############### test_RecurrentGradientMachine ############### ############### test_RecurrentGradientMachine ###############
...@@ -116,20 +125,20 @@ add_unittest_without_exec(test_RecurrentGradientMachine ...@@ -116,20 +125,20 @@ add_unittest_without_exec(test_RecurrentGradientMachine
test_RecurrentGradientMachine.cpp) test_RecurrentGradientMachine.cpp)
add_test(NAME test_RecurrentGradientMachine add_test(NAME test_RecurrentGradientMachine
COMMAND .set_python_path.sh -d COMMAND .set_python_path.sh -d
${PROJ_ROOT}/python:${PROJ_ROOT}/paddle/gserver/tests ${PADDLE_SOURCE_DIR}/python:${PADDLE_SOURCE_DIR}/paddle/gserver/tests
${CMAKE_CURRENT_BINARY_DIR}/test_RecurrentGradientMachine ${CMAKE_CURRENT_BINARY_DIR}/test_RecurrentGradientMachine
WORKING_DIRECTORY ${PROJ_ROOT}/paddle) WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle)
add_unittest_without_exec(test_NetworkCompare add_unittest_without_exec(test_NetworkCompare
test_NetworkCompare.cpp) test_NetworkCompare.cpp)
if(WITH_GPU) if(WITH_GPU)
add_test(NAME test_NetworkCompare add_test(NAME test_NetworkCompare
COMMAND .set_python_path.sh -d ${PROJ_ROOT}/python ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=true COMMAND .set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=true
WORKING_DIRECTORY ${PROJ_ROOT}/paddle) WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle)
else() else()
add_test(NAME test_NetworkCompare add_test(NAME test_NetworkCompare
COMMAND .set_python_path.sh -d ${PROJ_ROOT}/python ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=false COMMAND .set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=false
WORKING_DIRECTORY ${PROJ_ROOT}/paddle) WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle)
endif() endif()
...@@ -137,6 +146,6 @@ add_unittest_without_exec(test_PyDataProvider2 ...@@ -137,6 +146,6 @@ add_unittest_without_exec(test_PyDataProvider2
test_PyDataProvider2.cpp) test_PyDataProvider2.cpp)
add_test(NAME test_PyDataProvider2 add_test(NAME test_PyDataProvider2
COMMAND .set_python_path.sh -d ${PROJ_ROOT}/paddle/gserver/tests:${PROJ_ROOT}/python ${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProvider2 COMMAND .set_python_path.sh -d ${PADDLE_SOURCE_DIR}/paddle/gserver/tests:${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProvider2
WORKING_DIRECTORY ${PROJ_ROOT}/paddle WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle
) )
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "MKLDNNTester.h"
#include "paddle/gserver/layers/MKLDNNBase.h"
#include "paddle/gserver/layers/MKLDNNLayer.h"
namespace paddle {
// init data layer and test layer of both dnn and reference
void MKLDNNTester::reset(const TestConfig& dnn,
const TestConfig& ref,
size_t batchSize) {
const bool trans = false;
const bool useGpu = false;
// clear
configs_.clear();
layerNames_.clear();
dataLayers_.clear();
datas_.clear();
layerMaps_.clear();
parameters_.clear();
testLayers_.clear();
// resize
configs_.resize(NUM);
layerNames_.resize(NUM);
dataLayers_.resize(NUM);
datas_.resize(NUM);
layerMaps_.resize(NUM);
parameters_.resize(NUM);
testLayers_.resize(NUM);
// reset configs and layer names
configs_[DNN] = dnn;
configs_[REF] = ref;
layerNames_[DNN] = "mkldnn"; // the first is mkldnn layer
layerNames_[REF] = "reference"; // second is reference layer
// reset others
for (size_t i = 0; i < NUM; ++i) {
configs_[i].layerConfig.set_name(layerNames_[i]);
initDataLayer(configs_[i],
&(dataLayers_[i]),
&(datas_[i]),
&(layerMaps_[i]),
layerNames_[i],
batchSize,
trans,
useGpu);
initTestLayer(
configs_[i], &(layerMaps_[i]), &(parameters_[i]), &(testLayers_[i]));
}
dnnLayer_ = testLayers_[DNN];
refLayer_ = testLayers_[REF];
EXPECT_EQ(dataLayers_[DNN].size(), dataLayers_[REF].size());
EXPECT_EQ(parameters_[DNN].size(), parameters_[REF].size());
setInputImgSize();
}
void MKLDNNTester::setInputImgSize() {
for (size_t n = 0; n < dataLayers_.size(); ++n) {
for (size_t i = 0; i < dataLayers_[n].size(); ++i) {
// TODO(TJ): fix me when concat and elewise ready
dataLayers_[n][i]->getOutput().setFrameHeight(ih_);
dataLayers_[n][i]->getOutput().setFrameWidth(iw_);
}
}
}
// init randome parameters of ref, and copy to mkldnn
void MKLDNNTester::randomWgtDatas() {
EXPECT_EQ(parameters_[DNN].size(), parameters_[REF].size());
for (size_t i = 0; i < parameters_[REF].size(); ++i) {
const VectorPtr& dnnValue = parameters_[DNN][i]->getBuf(PARAMETER_VALUE);
const VectorPtr& refValue = parameters_[REF][i]->getBuf(PARAMETER_VALUE);
parameters_[REF][i]->randomize();
dnnValue->copyFrom(*refValue);
VLOG(lvl_) << "Random weight data " << parameters_[DNN][i]->getName();
printVector(dnnValue);
}
}
// random botdata of ref layer and copy same to mkldnn
void MKLDNNTester::randomBotDatas() {
CHECK_EQ(dataLayers_.size(), NUM);
for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) {
dataLayers_[REF][i]->getOutputValue()->randomizeUniform();
dataLayers_[DNN][i]->getOutputValue()->copyFrom(
*(dataLayers_[REF][i]->getOutputValue()));
VLOG(lvl_) << "Input " << i << " data:";
printMatrix(dataLayers_[REF][i]->getOutputValue());
}
}
void MKLDNNTester::randomTopDiffs() {
refLayer_->getOutputGrad()->randomizeUniform();
dnnLayer_->getOutputGrad()->copyFrom(*(refLayer_->getOutputGrad()));
VLOG(lvl_) << "Random dom Backward Input, TopDiff: ";
printMatrix(refLayer_->getOutputGrad());
}
void MKLDNNTester::checkForward() {
printTopDatas();
double delta = compareMatrix(testLayers_[DNN]->getOutputValue(),
testLayers_[REF]->getOutputValue());
VLOG(MKLDNN_ALL) << "Check Forward";
EXPECT_LE(fabs(delta), eps_);
}
void MKLDNNTester::checkBackwardData() {
// TODO(TJ): uncomment me when batch norm ready
// const bool isBN = dnnLayer_->getType() == "mkldnn_batch_norm";
for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) {
const MatrixPtr& dnnDiff = dataLayers_[DNN][i]->getOutputGrad();
const MatrixPtr& refDiff = dataLayers_[REF][i]->getOutputGrad();
VLOG(lvl_) << "Mkldnn Backward Output BotDiff " << i;
printMatrix(dnnDiff);
VLOG(lvl_) << "Reference Backward Output BotDiff " << i;
printMatrix(refDiff);
double delta = compareMatrix(dnnDiff, refDiff);
EXPECT_LE(fabs(delta), eps_);
// TODO(TJ): uncomment me when batch norm ready
// if (isBN) {
// // the other two inputs in batch norm are for moving mean and var
// break;
// }
}
}
void MKLDNNTester::checkBackwardWgts() {
CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size());
vector<VectorPtr> dnnWgts; // used to temply save mkldnn weights
saveWgt(parameters_[DNN], dnnWgts);
const MKLDNNLayerPtr dnnlayer =
std::dynamic_pointer_cast<MKLDNNLayer>(dnnLayer_);
CHECK(dnnlayer);
dnnlayer->convertWeightsToPaddle();
for (size_t i = 0; i < parameters_[DNN].size(); ++i) {
const VectorPtr& dnn = parameters_[DNN][i]->getBuf(PARAMETER_VALUE);
const VectorPtr& ref = parameters_[REF][i]->getBuf(PARAMETER_VALUE);
VLOG(lvl_) << "Mkldnn Output weight " << parameters_[DNN][i]->getName();
printVector(dnn);
VLOG(lvl_) << "Reference Output weight " << parameters_[REF][i]->getName();
printVector(ref);
double delta = compareVector(dnn, ref);
EXPECT_LE(fabs(delta), eps_);
}
VLOG(MKLDNN_ALL) << "Restore dnn weights before comapre";
restoreWgt(dnnWgts, parameters_[DNN]);
}
void MKLDNNTester::saveWgt(const vector<ParameterPtr>& from,
vector<VectorPtr>& to) {
const bool useGpu = false;
to.resize(from.size());
for (size_t i = 0; i < to.size(); ++i) {
const VectorPtr& wgt = from[i]->getBuf(PARAMETER_VALUE);
to[i] = Vector::create(wgt->getSize(), useGpu);
to[i]->copyFrom(*wgt);
}
}
void MKLDNNTester::restoreWgt(const vector<VectorPtr>& from,
vector<ParameterPtr>& to) {
CHECK_EQ(from.size(), to.size());
for (size_t i = 0; i < from.size(); ++i) {
const VectorPtr& wgt = to[i]->getBuf(PARAMETER_VALUE);
wgt->copyFrom(*from[i]);
}
}
// clear parameters grad
void MKLDNNTester::clearWgtDiffs() {
for (size_t n = 0; n < parameters_.size(); ++n) {
for (size_t i = 0; i < parameters_[n].size(); ++i) {
const VectorPtr& grad = parameters_[n][i]->getBuf(PARAMETER_GRADIENT);
if (grad) {
grad->zeroMem();
}
}
}
}
void MKLDNNTester::clearBotDiffs() {
// dnn and ref
for (size_t n = 0; n < dataLayers_.size(); ++n) {
// all inputs layers
for (size_t i = 0; i < dataLayers_[n].size(); ++i) {
dataLayers_[n][i]->getOutputGrad()->zeroMem();
}
}
}
void MKLDNNTester::clearBotDiffs(int n) {
CHECK_LT(n, NUM);
// all inputs layers
for (size_t i = 0; i < dataLayers_[n].size(); ++i) {
dataLayers_[n][i]->getOutputGrad()->zeroMem();
}
}
void MKLDNNTester::clearTopDatas() {
for (size_t i = 0; i < testLayers_.size(); ++i) {
testLayers_[i]->getOutputValue()->zeroMem();
}
}
void MKLDNNTester::printTopDatas() {
if (!log_) {
return;
}
for (int n = 0; n < NUM; ++n) {
VLOG(lvl_) << testLayers_[n]->getType() << " forward output TopData: ";
printMatrix(testLayers_[n]->getOutputValue());
}
}
void MKLDNNTester::printMatrix(const MatrixPtr& m) {
if (!log_) {
return;
}
std::ostringstream ostr;
m->print(ostr);
VLOG(lvl_) << std::endl << ostr.str();
}
void MKLDNNTester::printVector(const VectorPtr& v) {
if (!log_) {
return;
}
std::ostringstream ostr;
v->print(ostr, v->getSize());
VLOG(lvl_) << std::endl << ostr.str();
}
double MKLDNNTester::getDelta(const real* d1,
const real* d2,
size_t len,
const float failRate,
const float thres) {
double delta = 0, sum = 0;
int failCnt = 0;
const double eps = 1e-5;
double maxOut = 0;
for (size_t i = 0; i < len; ++i) {
double ref = fabs(d2[i]);
double diff = fabs(d1[i] - d2[i]);
delta += diff;
sum += ref;
if (ref > eps && fabs(d1[i]) > eps && diff / ref > thres) {
maxOut = std::max(maxOut, diff / ref);
failCnt++;
}
}
EXPECT_TRUE(std::isnormal(sum));
EXPECT_FALSE(std::isinf(sum));
EXPECT_FALSE(std::isnan(delta));
VLOG(MKLDNN_ALL) << "reference avg data: " << sum / len
<< ", delta: " << delta / sum << ", failCnt:" << failCnt;
return (failCnt / (float)len) > failRate ? maxOut : delta / sum;
}
double MKLDNNTester::compareMatrix(const MatrixPtr& m1, const MatrixPtr& m2) {
CHECK_EQ(m1->getElementCnt(), m2->getElementCnt());
return getDelta(m1->getData(), m2->getData(), m1->getElementCnt());
}
double MKLDNNTester::compareVector(const VectorPtr& v1, const VectorPtr& v2) {
CHECK_EQ(v1->getSize(), v2->getSize());
return getDelta(v1->getData(), v2->getData(), v1->getSize());
}
void MKLDNNTester::runOnce() {
// test forward
randomBotDatas();
dnnLayer_->forward(PASS_TRAIN);
refLayer_->forward(PASS_TRAIN);
checkForward();
// test backward
randomTopDiffs();
dnnLayer_->backward(nullptr);
refLayer_->backward(nullptr);
checkBackwardData();
checkBackwardWgts();
// clear buffers
// ref code will addto the diff, dnn code will writeto it
// and clearTopDatas() and clearWgtDiffs() should be coverd by test layers
clearBotDiffs(REF);
}
void MKLDNNTester::run(const TestConfig& dnn,
const TestConfig& ref,
size_t batchSize,
size_t inputImgH,
size_t inputImgW,
size_t iter,
float epsilon,
bool log,
int level) {
VLOG(MKLDNN_TESTS) << "Test MKLDNN functionality: " << dnn.layerConfig.type()
<< " vs " << ref.layerConfig.type();
ih_ = inputImgH;
iw_ = inputImgW;
iter_ = iter;
eps_ = epsilon;
log_ = log;
lvl_ = level;
// Firstly test FLAGS_use_mkldnn_wgt = false
FLAGS_use_mkldnn_wgt = false;
// reset and run once
reset(dnn, ref, batchSize);
randomWgtDatas();
clearWgtDiffs();
clearBotDiffs();
for (size_t i = 0; i < iter_; ++i) {
VLOG(MKLDNN_TESTS) << "Check Iteration " << i;
runOnce();
}
// Then test FLAGS_use_mkldnn_wgt = true
FLAGS_use_mkldnn_wgt = true;
// after run once the mkldnn weight has been stored in dnnlayer
// then save the weights and restart again
vector<VectorPtr> dnnWgts, refWgts;
CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size());
saveWgt(parameters_[DNN], dnnWgts);
saveWgt(parameters_[REF], refWgts);
// restart again with flag true
reset(dnn, ref, batchSize);
// restore wgt
restoreWgt(dnnWgts, parameters_[DNN]);
restoreWgt(refWgts, parameters_[REF]);
clearWgtDiffs();
clearBotDiffs();
for (size_t i = 0; i < iter_; ++i) {
VLOG(MKLDNN_TESTS) << "Check Iteration " << i;
runOnce();
}
}
} // namespace paddle
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include <vector>
#include "LayerGradUtil.h"
#include "paddle/gserver/layers/MKLDNNBase.h"
namespace paddle {
/**
* @brief test the functionality of Mkldnnlayers
* refer to paddle original function
*/
class MKLDNNTester {
enum {
DNN = 0, // MKLDNN layer
REF = 1, // Reference layer
NUM = 2, // Number of total
};
protected:
std::vector<TestConfig> configs_;
vector<string> layerNames_;
vector<vector<DataLayerPtr>> dataLayers_;
vector<vector<Argument>> datas_;
vector<LayerMap> layerMaps_;
vector<vector<ParameterPtr>> parameters_;
vector<LayerPtr> testLayers_;
LayerPtr dnnLayer_, refLayer_;
/// run some iterations, all the result should pass
size_t iter_;
/// whether to print out the details
bool log_;
/// vlog level to print the matrix details datas
int lvl_;
/// epsilon
float eps_;
/// input image size, default 1
size_t ih_, iw_;
public:
explicit MKLDNNTester(size_t iter = 3, float epsilon = 1e-4) {
iter_ = iter;
eps_ = epsilon;
log_ = false;
lvl_ = MKLDNN_ALL;
}
~MKLDNNTester() {}
public:
void run(const TestConfig& dnn,
const TestConfig& ref,
size_t batchSize,
size_t inputImgH = 1,
size_t inputImgW = 1,
size_t iter = 3,
float epsilon = 1e-4,
bool log = false,
int level = MKLDNN_ALL);
void setLogLevel(int lvl) { lvl_ = lvl; }
private:
void reset(const TestConfig& dnn, const TestConfig& ref, size_t batchSize);
void setInputImgSize();
void runOnce();
void randomWgtDatas();
void randomBotDatas();
void randomTopDiffs();
void checkForward();
void checkBackwardData();
void checkBackwardWgts();
void clearWgtDiffs();
void clearBotDiffs();
void clearBotDiffs(int n); // clear specific layer
void clearTopDatas();
void printTopDatas();
void printMatrix(const MatrixPtr& m);
void printVector(const VectorPtr& v);
void saveWgt(const vector<ParameterPtr>& from, vector<VectorPtr>& to);
void restoreWgt(const vector<VectorPtr>& from, vector<ParameterPtr>& to);
double compareMatrix(const MatrixPtr& m1, const MatrixPtr& m2);
double compareVector(const VectorPtr& v1, const VectorPtr& v2);
/**
* Get delta percent
* if many(>failRate) wrong(abs(dnn-ref)/abs(ref)>thres) points return the
* max(diff/ref)
* else return sum(abs(a-b)) / sum(abs(b))
* The return value should smaller than eps when passing.
*/
double getDelta(const real* d1,
const real* d2,
size_t len,
const float failRate = 1e-3,
const float thres = 0.1);
};
} // namespace paddle
...@@ -88,7 +88,7 @@ void checkLayerOut(vector<vector<int>> groundTruth, ...@@ -88,7 +88,7 @@ void checkLayerOut(vector<vector<int>> groundTruth,
TEST(Layer, kmaxSeqScoreLayer) { TEST(Layer, kmaxSeqScoreLayer) {
const size_t maxBeamSize = 100; const size_t maxBeamSize = 100;
int beamSize = 1 + (rand() % maxBeamSize); size_t beamSize = 1 + (rand() % maxBeamSize);
vector<int> seqStartPosition; vector<int> seqStartPosition;
vector<int> subSeqStartPosition; vector<int> subSeqStartPosition;
...@@ -96,6 +96,11 @@ TEST(Layer, kmaxSeqScoreLayer) { ...@@ -96,6 +96,11 @@ TEST(Layer, kmaxSeqScoreLayer) {
MatrixPtr inValue = MatrixPtr inValue =
Matrix::create(subSeqStartPosition.back(), 1, false, false); Matrix::create(subSeqStartPosition.back(), 1, false, false);
std::vector<bool> mode = {false};
#ifndef PADDLE_ONLY_CPU
mode.push_back(true);
#endif
for (auto hasSubseq : {false, true}) { for (auto hasSubseq : {false, true}) {
vector<vector<int>> groundTruth; vector<vector<int>> groundTruth;
inValue->randomizeUniform(); inValue->randomizeUniform();
...@@ -104,7 +109,7 @@ TEST(Layer, kmaxSeqScoreLayer) { ...@@ -104,7 +109,7 @@ TEST(Layer, kmaxSeqScoreLayer) {
hasSubseq ? subSeqStartPosition : seqStartPosition, hasSubseq ? subSeqStartPosition : seqStartPosition,
beamSize); beamSize);
for (auto useGpu : {false, true}) { for (auto useGpu : mode) {
TestConfig config; TestConfig config;
config.layerConfig.set_type("kmax_seq_score"); config.layerConfig.set_type("kmax_seq_score");
config.layerConfig.set_beam_size(beamSize); config.layerConfig.set_beam_size(beamSize);
......
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include <string>
#include <vector>
#include "MKLDNNTester.h"
#include "ModelConfig.pb.h"
using namespace paddle; // NOLINT
DECLARE_bool(thread_local_rand_use_global_seed);
DECLARE_bool(use_gpu);
DECLARE_bool(use_mkldnn);
struct testFCDesc {
int bs;
int ic;
int oc;
int ih, iw; // oh == ow == 1
};
void testFcLayer(const testFCDesc& pm) {
const std::string compareTypes[] = {"mkldnn_fc", "fc"};
TestConfig cfg;
cfg.layerConfig.set_type(compareTypes[0]);
cfg.layerConfig.set_size(pm.oc);
cfg.inputDefs.push_back(
{INPUT_DATA,
"layer_0",
/* size of input layer= */ size_t(pm.ic * pm.ih * pm.iw),
/* size of weight= */ size_t(pm.oc * pm.ic * pm.ih * pm.iw)});
cfg.layerConfig.add_inputs();
MKLDNNTester tester;
for (auto biasSize : {pm.oc, 0}) {
cfg.biasSize = biasSize;
TestConfig ref = cfg;
ref.layerConfig.set_type(compareTypes[1]);
for (auto bs : {pm.bs, 1}) {
tester.run(cfg, ref, bs, pm.ih, pm.iw);
}
}
}
TEST(MKLDNNLayer, FcLayer) {
testFcLayer({/*bs*/ 2, /*ic*/ 2, /*oc*/ 3, /*ih*/ 1, /*iw*/ 1});
testFcLayer({/*bs*/ 3, /*ic*/ 7, /*oc*/ 19, /*ih*/ 1, /*iw*/ 1});
testFcLayer({/*bs*/ 8, /*ic*/ 16, /*oc*/ 32, /*ih*/ 13, /*iw*/ 13});
testFcLayer({/*bs*/ 4, /*ic*/ 12, /*oc*/ 18, /*ih*/ 13, /*iw*/ 11});
testFcLayer({/*bs*/ 2, /*ic*/ 64, /*oc*/ 32, /*ih*/ 16, /*iw*/ 16});
testFcLayer({/*bs*/ 15, /*ic*/ 3, /*oc*/ 6, /*ih*/ 16, /*iw*/ 16});
}
// TODO(TJ): add branch test
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
FLAGS_use_gpu = false;
FLAGS_use_mkldnn = true;
initMain(argc, argv);
FLAGS_thread_local_rand_use_global_seed = true;
srand(1);
return RUN_ALL_TESTS();
}
...@@ -15,13 +15,13 @@ ...@@ -15,13 +15,13 @@
file(GLOB MATH_HEADERS . *.h) file(GLOB MATH_HEADERS . *.h)
file(GLOB MATH_SOURCES . *.cpp) file(GLOB MATH_SOURCES . *.cpp)
set(MATH_SOURCES set(MATH_SOURCES
"${PROJ_ROOT}/paddle/math/BaseMatrix.cu" "${PADDLE_SOURCE_DIR}/paddle/math/BaseMatrix.cu"
"${PROJ_ROOT}/paddle/math/TrainingAlgorithmOp.cu" "${PADDLE_SOURCE_DIR}/paddle/math/TrainingAlgorithmOp.cu"
${MATH_SOURCES}) ${MATH_SOURCES})
if(NOT WITH_GPU) if(NOT WITH_GPU)
# then compile BaseMatrix.cu as c++ file # then compile BaseMatrix.cu as c++ file
compile_cu_as_cpp("${PROJ_ROOT}/paddle/math/BaseMatrix.cu") compile_cu_as_cpp("${PADDLE_SOURCE_DIR}/paddle/math/BaseMatrix.cu")
compile_cu_as_cpp("${PROJ_ROOT}/paddle/math/TrainingAlgorithmOp.cu") compile_cu_as_cpp("${PADDLE_SOURCE_DIR}/paddle/math/TrainingAlgorithmOp.cu")
add_library(paddle_math STATIC add_library(paddle_math STATIC
${MATH_SOURCES}) ${MATH_SOURCES})
else() else()
......
...@@ -302,6 +302,10 @@ public: ...@@ -302,6 +302,10 @@ public:
bool isSparse() const { return true; } bool isSparse() const { return true; }
private: private:
using Matrix::mul;
using Matrix::copyFrom; using Matrix::copyFrom;
using Matrix::rowMax;
using Matrix::print;
using Matrix::subMatrix;
}; };
} // namespace paddle } // namespace paddle
...@@ -231,6 +231,9 @@ public: ...@@ -231,6 +231,9 @@ public:
private: private:
using Matrix::mul; using Matrix::mul;
using Matrix::copyFrom; using Matrix::copyFrom;
using Matrix::rowMax;
using Matrix::print;
using Matrix::subMatrix;
}; };
} // namespace paddle } // namespace paddle
...@@ -49,25 +49,20 @@ cc_library(net_op SRCS net_op.cc DEPS op_registry) ...@@ -49,25 +49,20 @@ cc_library(net_op SRCS net_op.cc DEPS op_registry)
cc_test(net_op_test SRCS net_op_test.cc DEPS net_op) cc_test(net_op_test SRCS net_op_test.cc DEPS net_op)
op_library(add_op SRCS add_op.cc add_op.cu) op_library(add_op SRCS add_op.cc add_op.cu)
cc_test(add_op_test SRCS add_op_test.cc DEPS add_op)
op_library(mean_op SRCS mean_op.cc mean_op.cu) op_library(mean_op SRCS mean_op.cc mean_op.cu)
cc_test(mean_op_test SRCS mean_op_test.cc DEPS mean_op)
op_library(mul_op SRCS mul_op.cc mul_op.cu) op_library(mul_op SRCS mul_op.cc mul_op.cu)
op_library(rowwise_add_op SRCS rowwise_add_op.cu rowwise_add_op.cc) op_library(rowwise_add_op SRCS rowwise_add_op.cu rowwise_add_op.cc)
op_library(sigmoid_op SRCS sigmoid_op.cc sigmoid_op.cu) op_library(sigmoid_op SRCS sigmoid_op.cc sigmoid_op.cu)
op_library(softmax_op SRCS softmax_op.cc softmax_op.cu) op_library(softmax_op SRCS softmax_op.cc softmax_op.cu)
op_library(gaussian_random_op SRCS gaussian_random_op.cc gaussian_random_op.cu)
op_library(cross_entropy_op SRCS cross_entropy_op.cc cross_entropy_op.cu) op_library(cross_entropy_op SRCS cross_entropy_op.cc cross_entropy_op.cu)
op_library(fill_zeros_like_op SRCS fill_zeros_like_op.cc fill_zeros_like_op.cu) op_library(fill_zeros_like_op SRCS fill_zeros_like_op.cc fill_zeros_like_op.cu)
op_library(sgd_op SRCS sgd_op.cc sgd_op.cu) op_library(sgd_op SRCS sgd_op.cc sgd_op.cu)
cc_test(sgd_op_test SRCS sgd_op_test.cc DEPS sgd_op)
op_library(fc_op
SRCS fc_op.cc
DEPS mul_op rowwise_add_op sigmoid_op softmax_op net_op)
op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc
DEPS op_desc tensor op_registry operator net_op) DEPS op_desc tensor op_registry operator net_op)
cc_test(recurrent_op_test SRCS recurrent_op_test.cc DEPS recurrent_op gtest mul_op add_op) cc_test(recurrent_op_test SRCS recurrent_op_test.cc DEPS recurrent_op gtest mul_op add_op)
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <random>
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
template <typename T>
class GaussianRandomKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& context) const override {
float mean = context.op_.GetAttr<float>("mean");
float std = context.op_.GetAttr<float>("std");
auto* tensor = context.Output<framework::Tensor>(0);
T* data = tensor->mutable_data<T>(context.GetPlace());
// TODO(dzh): attribute does not support unsigned int.
// And we need a global random seed configuration.
int seed = context.op_.GetAttr<int>("seed");
if (seed == 0) {
seed = std::random_device()();
}
std::mt19937 g(seed);
std::normal_distribution<T> distribution(mean, std);
ssize_t size = framework::product(tensor->dims());
for (int i = 0; i < size; ++i) {
data[i] = distribution(g);
}
}
};
class GaussianRandomOp : public framework::OperatorWithKernel {
protected:
void InferShape(const framework::InferShapeContext& context) const override {
auto* tensor = context.Output<framework::Tensor>(0);
auto dims = GetAttr<std::vector<int>>("dims");
PADDLE_ENFORCE(dims.size() > 0UL,
"dims can be one int or array. dims must be set.");
tensor->Resize(framework::make_ddim(dims));
}
};
class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker {
public:
GaussianRandomOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddOutput("Out", "output matrix of random op");
AddComment(R"DOC(
GaussianRandom operator.
Use to initialize tensor with gaussian random generator.
)DOC");
AddAttr<std::vector<int>>("dims", "The dimension of random tensor.");
AddAttr<float>("mean", "mean value of random.").SetDefault(.0f);
AddAttr<float>("std", "minimum value of random value.").SetDefault(1.0f);
AddAttr<int>("seed",
"Random seed of generator."
"0 means use system wide seed")
.SetDefault(0);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(gaussian_random, ops::GaussianRandomOp, ops::GaussianRandomOpMaker);
REGISTER_OP_CPU_KERNEL(gaussian_random, ops::GaussianRandomKernel<float>);
...@@ -12,65 +12,42 @@ ...@@ -12,65 +12,42 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/operators/net_op.h" #include <memory>
#include <random>
#include "paddle/platform/dynload/curand.h"
#include "paddle/platform/gpu_info.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using OpRegistry = framework::OpRegistry; template <typename T>
class GaussianRandomKernel : public framework::OpKernel {
class FullyConnectedOp : public NetOp {
public: public:
void Init() override { void Compute(const framework::ExecutionContext& context) const override {
AddOp(OpRegistry::CreateOp("mul", float mean = context.op_.GetAttr<float>("mean");
{ float std = context.op_.GetAttr<float>("std");
Input("X"), Input("W"), auto* tensor = context.Output<framework::Tensor>(0);
}, T* data = tensor->mutable_data<T>(context.GetPlace());
{Output("before_act")}, {}));
auto b = Input("b"); int seed = context.op_.GetAttr<int>("seed");
if (b != framework::kEmptyVarName) { if (seed == 0) {
AddOp(OpRegistry::CreateOp("rowwise_add", std::random_device rd;
{Output("before_act"), Input("b")}, seed = rd();
{Output("before_act")}, {}));
} }
curandGenerator_t g;
auto activation = GetAttr<std::string>("activation"); PADDLE_ENFORCE(platform::dynload::curandCreateGenerator(
AddOp(OpRegistry::CreateOp(activation, {Output("before_act")}, &g, CURAND_RNG_PSEUDO_DEFAULT));
{Output("Y")}, {})); PADDLE_ENFORCE(
CompleteAddOp(false); platform::dynload::curandSetPseudoRandomGeneratorSeed(g, seed));
platform::dynload::curandGenerateNormal(
g, data, framework::product(tensor->dims()), mean, std);
} }
}; };
class FullyConnectedOpMaker : public framework::OpProtoAndCheckerMaker {
public:
FullyConnectedOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "the input of fc operator");
AddInput("W", "the weight of fc operator");
AddInput("b", "the bias of fc operator");
AddOutput("Y", "the output of fc operator");
AddOutput("before_act", "the before activation output of fc operator")
.SetTemporary();
AddAttr<std::string>("activation", "The activation key for fc layer")
.SetDefault("sigmoid")
.InEnum({"sigmoid", "softmax"});
//! TODO(yuyang18): Complete comment;
AddComment("FullyConnected Operator");
}
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
USE_OP(mul);
USE_OP(rowwise_add);
USE_OP(sigmoid);
USE_OP(softmax);
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP(fc, ops::FullyConnectedOp, ops::FullyConnectedOpMaker); REGISTER_OP_GPU_KERNEL(gaussian_random, ops::GaussianRandomKernel<float>);
...@@ -41,7 +41,7 @@ class MeanOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -41,7 +41,7 @@ class MeanOpMaker : public framework::OpProtoAndCheckerMaker {
class MeanGradOp : public framework::OperatorWithKernel { class MeanGradOp : public framework::OperatorWithKernel {
protected: protected:
void InferShape(const framework::InferShapeContext &ctx) const override { void InferShape(const framework::InferShapeContext &ctx) const override {
ctx.Output<Tensor>("X" + framework::kGradVarSuffix) ctx.Output<Tensor>(framework::GradVarName("X"))
->Resize(ctx.Input<Tensor>("X")->dims()); ->Resize(ctx.Input<Tensor>("X")->dims());
} }
}; };
......
...@@ -48,10 +48,10 @@ template <typename Place, typename T> ...@@ -48,10 +48,10 @@ template <typename Place, typename T>
class MeanGradKernel : public framework::OpKernel { class MeanGradKernel : public framework::OpKernel {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto OG = context.Input<Tensor>("Out" + framework::kGradVarSuffix); auto OG = context.Input<Tensor>(framework::GradVarName("Out"));
PADDLE_ENFORCE(framework::product(OG->dims()) == 1, PADDLE_ENFORCE(framework::product(OG->dims()) == 1,
"Mean Gradient should be scalar"); "Mean Gradient should be scalar");
auto IG = context.Output<Tensor>("X" + framework::kGradVarSuffix); auto IG = context.Output<Tensor>(framework::GradVarName("X"));
IG->mutable_data<T>(context.GetPlace()); IG->mutable_data<T>(context.GetPlace());
T ig_size = (T)framework::product(IG->dims()); T ig_size = (T)framework::product(IG->dims());
......
...@@ -395,4 +395,4 @@ TEST(RecurrentOp, LinkMemories) { ...@@ -395,4 +395,4 @@ TEST(RecurrentOp, LinkMemories) {
USE_OP(add_two); USE_OP(add_two);
USE_OP(mul); USE_OP(mul);
USE_OP_WITHOUT_KERNEL(recurrent_op); USE_OP_ITSELF(recurrent_op);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include <paddle/framework/op_registry.h>
USE_OP(sgd);
TEST(SGDOp, GetOpProto) {
auto& protos = paddle::framework::OpRegistry::protos();
auto it = protos.find("sgd");
ASSERT_NE(it, protos.end());
}
...@@ -51,7 +51,8 @@ class GPUUniformRandomKernel : public framework::OpKernel { ...@@ -51,7 +51,8 @@ class GPUUniformRandomKernel : public framework::OpKernel {
unsigned int seed = unsigned int seed =
static_cast<unsigned int>(context.op_.GetAttr<int>("seed")); static_cast<unsigned int>(context.op_.GetAttr<int>("seed"));
if (seed == 0) { if (seed == 0) {
seed = std::random_device()(); std::random_device rd;
seed = rd();
} }
T min = static_cast<T>(context.op_.GetAttr<float>("min")); T min = static_cast<T>(context.op_.GetAttr<float>("min"));
T max = static_cast<T>(context.op_.GetAttr<float>("max")); T max = static_cast<T>(context.op_.GetAttr<float>("max"));
......
...@@ -8,7 +8,7 @@ cc_test(place_test SRCS place_test.cc DEPS place glog gflags) ...@@ -8,7 +8,7 @@ cc_test(place_test SRCS place_test.cc DEPS place glog gflags)
add_subdirectory(dynload) add_subdirectory(dynload)
cc_test(enforce_test SRCS enforce_test.cc) cc_test(enforce_test SRCS enforce_test.cc DEPS stringpiece)
IF(WITH_GPU) IF(WITH_GPU)
set(GPU_CTX_DEPS dynload_cuda dynamic_loader) set(GPU_CTX_DEPS dynload_cuda dynamic_loader)
......
...@@ -50,8 +50,8 @@ extern void *cublas_dso_handle; ...@@ -50,8 +50,8 @@ extern void *cublas_dso_handle;
#else #else
#define DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ #define DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) \
struct DynLoad__##__name { \ struct DynLoad__##__name { \
inline template <typename... Args> \ template <typename... Args> \
cublasStatus_t operator()(Args... args) { \ inline cublasStatus_t operator()(Args... args) { \
return __name(args...); \ return __name(args...); \
} \ } \
}; \ }; \
......
...@@ -55,6 +55,7 @@ extern void *curand_dso_handle; ...@@ -55,6 +55,7 @@ extern void *curand_dso_handle;
__macro(curandSetPseudoRandomGeneratorSeed); \ __macro(curandSetPseudoRandomGeneratorSeed); \
__macro(curandGenerateUniform); \ __macro(curandGenerateUniform); \
__macro(curandGenerateUniformDouble); \ __macro(curandGenerateUniformDouble); \
__macro(curandGenerateNormal); \
__macro(curandDestroyGenerator); __macro(curandDestroyGenerator);
CURAND_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CURAND_WRAP); CURAND_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CURAND_WRAP);
......
...@@ -15,11 +15,12 @@ limitations under the License. */ ...@@ -15,11 +15,12 @@ limitations under the License. */
#pragma once #pragma once
#include <execinfo.h> #include <execinfo.h>
#include <paddle/string/printf.h>
#include <iomanip> #include <iomanip>
#include <sstream> #include <sstream>
#include <stdexcept> #include <stdexcept>
#include <string> #include <string>
#include "paddle/string/printf.h"
#include "paddle/string/to_string.h"
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
...@@ -194,8 +195,8 @@ inline void throw_on_error(T e) { ...@@ -194,8 +195,8 @@ inline void throw_on_error(T e) {
#define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \ #define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \
PADDLE_ENFORCE(__VAL0 __CMP __VAL1, \ PADDLE_ENFORCE(__VAL0 __CMP __VAL1, \
"enforce %s " #__CMP " %s failed, %s " #__INV_CMP " %s\n%s", \ "enforce %s " #__CMP " %s failed, %s " #__INV_CMP " %s\n%s", \
#__VAL0, #__VAL1, std::to_string(__VAL0), \ #__VAL0, #__VAL1, paddle::string::to_string(__VAL0), \
std::to_string(__VAL1), \ paddle::string::to_string(__VAL1), \
paddle::string::Sprintf("" __VA_ARGS__)); paddle::string::Sprintf("" __VA_ARGS__));
} // namespace platform } // namespace platform
......
...@@ -9,10 +9,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -9,10 +9,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <array>
#include <iostream>
#include <memory> #include <memory>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/platform/enforce.h" #include "paddle/platform/enforce.h"
#include "paddle/string/piece.h"
using StringPiece = paddle::string::Piece;
using paddle::string::HasPrefix;
TEST(ENFORCE, OK) { TEST(ENFORCE, OK) {
PADDLE_ENFORCE(true, "Enforce is ok %d now %f", 123, 0.345); PADDLE_ENFORCE(true, "Enforce is ok %d now %f", 123, 0.345);
...@@ -22,19 +28,15 @@ TEST(ENFORCE, OK) { ...@@ -22,19 +28,15 @@ TEST(ENFORCE, OK) {
} }
TEST(ENFORCE, FAILED) { TEST(ENFORCE, FAILED) {
bool in_catch = false; bool caught_exception = false;
try { try {
PADDLE_ENFORCE(false, "Enforce is not ok %d at all", 123); PADDLE_ENFORCE(false, "Enforce is not ok %d at all", 123);
} catch (paddle::platform::EnforceNotMet error) { } catch (paddle::platform::EnforceNotMet error) {
// your error handling code here caught_exception = true;
in_catch = true; EXPECT_TRUE(
std::string msg = "Enforce is not ok 123 at all"; HasPrefix(StringPiece(error.what()), "Enforce is not ok 123 at all"));
const char* what = error.what();
for (size_t i = 0; i < msg.length(); ++i) {
ASSERT_EQ(what[i], msg[i]);
}
} }
ASSERT_TRUE(in_catch); EXPECT_TRUE(caught_exception);
} }
TEST(ENFORCE, NO_ARG_OK) { TEST(ENFORCE, NO_ARG_OK) {
...@@ -47,41 +49,27 @@ TEST(ENFORCE, NO_ARG_OK) { ...@@ -47,41 +49,27 @@ TEST(ENFORCE, NO_ARG_OK) {
TEST(ENFORCE_EQ, NO_EXTRA_MSG_FAIL) { TEST(ENFORCE_EQ, NO_EXTRA_MSG_FAIL) {
int a = 2; int a = 2;
bool in_catch = false; bool caught_exception = false;
try { try {
PADDLE_ENFORCE_EQ(a, 1 + 3); PADDLE_ENFORCE_EQ(a, 1 + 3);
} catch (paddle::platform::EnforceNotMet error) { } catch (paddle::platform::EnforceNotMet error) {
in_catch = true; caught_exception = true;
const std::string msg = "enforce a == 1 + 3 failed, 2 != 4"; HasPrefix(StringPiece(error.what()), "enforce a == 1 + 3 failed, 2 != 4");
const char* what = error.what();
for (size_t i = 0; i < msg.length(); ++i) {
ASSERT_EQ(what[i], msg[i]);
}
} }
EXPECT_TRUE(caught_exception);
ASSERT_TRUE(in_catch);
} }
TEST(ENFORCE_EQ, EXTRA_MSG_FAIL) { TEST(ENFORCE_EQ, EXTRA_MSG_FAIL) {
int a = 2; int a = 2;
bool in_catch = false; bool caught_exception = false;
try { try {
PADDLE_ENFORCE_EQ(a, 1 + 3, "%s size not match", "their"); PADDLE_ENFORCE_EQ(a, 1 + 3, "%s size not match", "their");
} catch (paddle::platform::EnforceNotMet error) { } catch (paddle::platform::EnforceNotMet error) {
in_catch = true; caught_exception = true;
const std::string msg = HasPrefix(StringPiece(error.what()),
"enforce a == 1 + 3 failed, 2 != 4\ntheir size not match"; "enforce a == 1 + 3 failed, 2 != 4\ntheir size not match");
const char* what = error.what();
for (size_t i = 0; i < msg.length(); ++i) {
ASSERT_EQ(what[i], msg[i]);
} }
} EXPECT_TRUE(caught_exception);
ASSERT_TRUE(in_catch);
} }
TEST(ENFORCE_NE, OK) { TEST(ENFORCE_NE, OK) {
...@@ -89,42 +77,32 @@ TEST(ENFORCE_NE, OK) { ...@@ -89,42 +77,32 @@ TEST(ENFORCE_NE, OK) {
PADDLE_ENFORCE_NE(1.0, 2UL); PADDLE_ENFORCE_NE(1.0, 2UL);
} }
TEST(ENFORCE_NE, FAIL) { TEST(ENFORCE_NE, FAIL) {
bool in_catch = false; bool caught_exception = false;
try { try {
// 2UL here to check data type compatible // 2UL here to check data type compatible
PADDLE_ENFORCE_NE(1.0, 1UL); PADDLE_ENFORCE_NE(1.0, 1UL);
} catch (paddle::platform::EnforceNotMet error) { } catch (paddle::platform::EnforceNotMet error) {
in_catch = true; caught_exception = true;
const std::string msg = "enforce 1.0 != 1UL failed, 1.000000 == 1"; EXPECT_TRUE(HasPrefix(StringPiece(error.what()),
const char* what = error.what(); "enforce 1.0 != 1UL failed, 1 == 1"))
for (size_t i = 0; i < msg.length(); ++i) { << error.what() << " does not have expected prefix";
ASSERT_EQ(what[i], msg[i]);
}
} }
EXPECT_TRUE(caught_exception);
ASSERT_TRUE(in_catch);
} }
TEST(ENFORCE_GT, OK) { PADDLE_ENFORCE_GT(2, 1); } TEST(ENFORCE_GT, OK) { PADDLE_ENFORCE_GT(2, 1); }
TEST(ENFORCE_GT, FAIL) { TEST(ENFORCE_GT, FAIL) {
bool in_catch = false; bool caught_exception = false;
try { try {
// 2UL here to check data type compatible
PADDLE_ENFORCE_GT(1, 2UL); PADDLE_ENFORCE_GT(1, 2UL);
} catch (paddle::platform::EnforceNotMet error) { } catch (paddle::platform::EnforceNotMet error) {
in_catch = true; caught_exception = true;
const std::string msg = "enforce 1 > 2UL failed, 1 <= 2"; EXPECT_TRUE(
const char* what = error.what(); HasPrefix(StringPiece(error.what()), "enforce 1 > 2UL failed, 1 <= 2"));
for (size_t i = 0; i < msg.length(); ++i) {
ASSERT_EQ(what[i], msg[i]);
} }
} EXPECT_TRUE(caught_exception);
ASSERT_TRUE(in_catch);
} }
TEST(ENFORCE_GE, OK) { TEST(ENFORCE_GE, OK) {
...@@ -134,21 +112,16 @@ TEST(ENFORCE_GE, OK) { ...@@ -134,21 +112,16 @@ TEST(ENFORCE_GE, OK) {
PADDLE_ENFORCE_GE(3.21, 2UL); PADDLE_ENFORCE_GE(3.21, 2UL);
} }
TEST(ENFORCE_GE, FAIL) { TEST(ENFORCE_GE, FAIL) {
bool in_catch = false; bool caught_exception = false;
try { try {
PADDLE_ENFORCE_GE(1, 2UL); PADDLE_ENFORCE_GE(1, 2UL);
} catch (paddle::platform::EnforceNotMet error) { } catch (paddle::platform::EnforceNotMet error) {
in_catch = true; caught_exception = true;
const std::string msg = "enforce 1 >= 2UL failed, 1 < 2"; EXPECT_TRUE(
const char* what = error.what(); HasPrefix(StringPiece(error.what()), "enforce 1 >= 2UL failed, 1 < 2"));
for (size_t i = 0; i < msg.length(); ++i) {
ASSERT_EQ(what[i], msg[i]);
} }
} EXPECT_TRUE(caught_exception);
ASSERT_TRUE(in_catch);
} }
TEST(ENFORCE_LE, OK) { TEST(ENFORCE_LE, OK) {
...@@ -159,21 +132,16 @@ TEST(ENFORCE_LE, OK) { ...@@ -159,21 +132,16 @@ TEST(ENFORCE_LE, OK) {
PADDLE_ENFORCE_LE(2UL, 3.2); PADDLE_ENFORCE_LE(2UL, 3.2);
} }
TEST(ENFORCE_LE, FAIL) { TEST(ENFORCE_LE, FAIL) {
bool in_catch = false; bool caught_exception = false;
try { try {
PADDLE_ENFORCE_GT(1, 2UL); PADDLE_ENFORCE_GT(1, 2UL);
} catch (paddle::platform::EnforceNotMet error) { } catch (paddle::platform::EnforceNotMet error) {
in_catch = true; caught_exception = true;
const std::string msg = "enforce 1 > 2UL failed, 1 <= 2"; EXPECT_TRUE(
const char* what = error.what(); HasPrefix(StringPiece(error.what()), "enforce 1 > 2UL failed, 1 <= 2"));
for (size_t i = 0; i < msg.length(); ++i) {
ASSERT_EQ(what[i], msg[i]);
}
} }
EXPECT_TRUE(caught_exception);
ASSERT_TRUE(in_catch);
} }
TEST(ENFORCE_LT, OK) { TEST(ENFORCE_LT, OK) {
...@@ -182,21 +150,15 @@ TEST(ENFORCE_LT, OK) { ...@@ -182,21 +150,15 @@ TEST(ENFORCE_LT, OK) {
PADDLE_ENFORCE_LT(2UL, 3); PADDLE_ENFORCE_LT(2UL, 3);
} }
TEST(ENFORCE_LT, FAIL) { TEST(ENFORCE_LT, FAIL) {
bool in_catch = false; bool caught_exception = false;
try { try {
PADDLE_ENFORCE_LT(1UL, 0.12); PADDLE_ENFORCE_LT(1UL, 0.12);
} catch (paddle::platform::EnforceNotMet error) { } catch (paddle::platform::EnforceNotMet error) {
in_catch = true; caught_exception = true;
const std::string msg = "enforce 1UL < 0.12 failed, 1 >= 0.12"; EXPECT_TRUE(HasPrefix(StringPiece(error.what()),
const char* what = error.what(); "enforce 1UL < 0.12 failed, 1 >= 0.12"));
for (size_t i = 0; i < msg.length(); ++i) {
ASSERT_EQ(what[i], msg[i]);
} }
} EXPECT_TRUE(caught_exception);
ASSERT_TRUE(in_catch);
} }
TEST(ENFORCE_NOT_NULL, OK) { TEST(ENFORCE_NOT_NULL, OK) {
...@@ -205,20 +167,50 @@ TEST(ENFORCE_NOT_NULL, OK) { ...@@ -205,20 +167,50 @@ TEST(ENFORCE_NOT_NULL, OK) {
delete a; delete a;
} }
TEST(ENFORCE_NOT_NULL, FAIL) { TEST(ENFORCE_NOT_NULL, FAIL) {
bool in_catch = false; bool caught_exception = false;
int* a{nullptr};
try { try {
int* a = nullptr;
PADDLE_ENFORCE_NOT_NULL(a); PADDLE_ENFORCE_NOT_NULL(a);
} catch (paddle::platform::EnforceNotMet error) { } catch (paddle::platform::EnforceNotMet error) {
in_catch = true; caught_exception = true;
const std::string msg = "a should not be null"; EXPECT_TRUE(HasPrefix(StringPiece(error.what()), "a should not be null"));
const char* what = error.what(); }
for (size_t i = 0; i < msg.length(); ++i) { EXPECT_TRUE(caught_exception);
ASSERT_EQ(what[i], msg[i]); }
struct Dims {
size_t dims_[4];
bool operator==(const Dims& o) const {
for (size_t i = 0; i < 4; ++i) {
if (dims_[i] != o.dims_[i]) return false;
}
return true;
}
};
std::ostream& operator<<(std::ostream& os, const Dims& d) {
for (size_t i = 0; i < 4; ++i) {
if (i == 0) {
os << "[";
} }
os << d.dims_[i];
if (i == 4 - 1) {
os << "]";
} else {
os << ", ";
} }
}
return os;
}
TEST(ENFORCE_USER_DEFINED_CLASS, EQ) {
Dims a{{1, 2, 3, 4}}, b{{1, 2, 3, 4}};
PADDLE_ENFORCE_EQ(a, b);
}
ASSERT_TRUE(in_catch); TEST(ENFORCE_USER_DEFINED_CLASS, NE) {
Dims a{{1, 2, 3, 4}}, b{{5, 6, 7, 8}};
ASSERT_THROW(PADDLE_ENFORCE_EQ(a, b), paddle::platform::EnforceNotMet);
} }
\ No newline at end of file
...@@ -14,8 +14,8 @@ limitations under the License. */ ...@@ -14,8 +14,8 @@ limitations under the License. */
#pragma once #pragma once
#include <boost/variant.hpp>
#include <iostream> #include <iostream>
#include "paddle/platform/variant.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <boost/config.hpp>
#ifndef PADDLE_ONLY_CPU
// Because boost's variadic templates has bug on nvcc, boost will disable
// variadic template support when GPU enabled on nvcc.
// Define BOOST_NO_CXX11_VARIADIC_TEMPLATES on gcc/clang to generate same
// function symbols.
//
// https://github.com/PaddlePaddle/Paddle/issues/3386
#ifndef BOOST_NO_CXX11_VARIADIC_TEMPLATES
#define BOOST_NO_CXX11_VARIADIC_TEMPLATES
#endif
#endif
#include <boost/variant.hpp>
...@@ -3,7 +3,7 @@ add_unittest_without_exec(socket_test ...@@ -3,7 +3,7 @@ add_unittest_without_exec(socket_test
SocketTest.cpp) SocketTest.cpp)
add_test(NAME socket_test add_test(NAME socket_test
COMMAND ${PROJ_ROOT}/paddle/.set_port.sh -p port COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port
${CMAKE_CURRENT_BINARY_DIR}/socket_test --loop_time=10) ${CMAKE_CURRENT_BINARY_DIR}/socket_test --loop_time=10)
####################### test_ProtoServer #################### ####################### test_ProtoServer ####################
...@@ -12,7 +12,7 @@ add_unittest_without_exec(test_ProtoServer ...@@ -12,7 +12,7 @@ add_unittest_without_exec(test_ProtoServer
IF(NOT ON_TRAVIS) IF(NOT ON_TRAVIS)
add_test(NAME test_ProtoServer add_test(NAME test_ProtoServer
COMMAND ${PROJ_ROOT}/paddle/.set_port.sh -p port COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port
${CMAKE_CURRENT_BINARY_DIR}/test_ProtoServer) ${CMAKE_CURRENT_BINARY_DIR}/test_ProtoServer)
ENDIF(NOT ON_TRAVIS) ENDIF(NOT ON_TRAVIS)
...@@ -24,5 +24,5 @@ ENDIF(NOT ON_TRAVIS) ...@@ -24,5 +24,5 @@ ENDIF(NOT ON_TRAVIS)
add_unittest_without_exec(test_ParameterServer2 add_unittest_without_exec(test_ParameterServer2
test_ParameterServer2.cpp) test_ParameterServer2.cpp)
add_test(NAME test_ParameterServer2 add_test(NAME test_ParameterServer2
COMMAND ${PROJ_ROOT}/paddle/.set_port.sh -p port -n 4 COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port -n 4
${CMAKE_CURRENT_BINARY_DIR}/test_ParameterServer2) ${CMAKE_CURRENT_BINARY_DIR}/test_ParameterServer2)
...@@ -31,7 +31,7 @@ Configuring cmake in /paddle/build ... ...@@ -31,7 +31,7 @@ Configuring cmake in /paddle/build ...
-DWITH_DOC=OFF -DWITH_DOC=OFF
-DWITH_GPU=${WITH_GPU:-OFF} -DWITH_GPU=${WITH_GPU:-OFF}
-DWITH_AVX=${WITH_AVX:-OFF} -DWITH_AVX=${WITH_AVX:-OFF}
-DWITH_GOLANG=${WITH_GOLANG:-OFF} -DWITH_GOLANG=${WITH_GOLANG:-ON}
-DWITH_SWIG_PY=ON -DWITH_SWIG_PY=ON
-DWITH_C_API=${WITH_C_API:-OFF} -DWITH_C_API=${WITH_C_API:-OFF}
-DWITH_PYTHON=${WITH_PYTHON:-ON} -DWITH_PYTHON=${WITH_PYTHON:-ON}
...@@ -51,7 +51,7 @@ cmake .. \ ...@@ -51,7 +51,7 @@ cmake .. \
-DWITH_DOC=OFF \ -DWITH_DOC=OFF \
-DWITH_GPU=${WITH_GPU:-OFF} \ -DWITH_GPU=${WITH_GPU:-OFF} \
-DWITH_AVX=${WITH_AVX:-OFF} \ -DWITH_AVX=${WITH_AVX:-OFF} \
-DWITH_GOLANG=${WITH_GOLANG:-OFF} \ -DWITH_GOLANG=${WITH_GOLANG:-ON} \
-DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} \ -DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} \
-DWITH_C_API=${WITH_C_API:-OFF} \ -DWITH_C_API=${WITH_C_API:-OFF} \
-DWITH_PYTHON=${WITH_PYTHON:-ON} \ -DWITH_PYTHON=${WITH_PYTHON:-ON} \
...@@ -74,11 +74,11 @@ cat <<EOF ...@@ -74,11 +74,11 @@ cat <<EOF
Running unit tests ... Running unit tests ...
======================================== ========================================
EOF EOF
ctest --output-on-failure
# make install should also be test when unittest # make install should also be test when unittest
make install -j `nproc` make install -j `nproc`
pip install /usr/local/opt/paddle/share/wheels/*.whl pip install /usr/local/opt/paddle/share/wheels/*.whl
paddle version paddle version
ctest --output-on-failure
fi fi
...@@ -130,7 +130,7 @@ fi ...@@ -130,7 +130,7 @@ fi
# generate deb package for current build # generate deb package for current build
# FIXME(typhoonzero): should we remove paddle/scripts/deb ? # FIXME(typhoonzero): should we remove paddle/scripts/deb ?
if [[ ${WITH_DEB:-OFF} == "ON" ]]; then if [[ ${WITH_DEB:-ON} == "ON" ]]; then
cat <<EOF cat <<EOF
======================================== ========================================
Generating .deb package ... Generating .deb package ...
......
...@@ -2,3 +2,4 @@ cc_library(stringpiece SRCS piece.cc) ...@@ -2,3 +2,4 @@ cc_library(stringpiece SRCS piece.cc)
cc_test(stringpiece_test SRCS piece_test.cc DEPS stringpiece glog gflags) cc_test(stringpiece_test SRCS piece_test.cc DEPS stringpiece glog gflags)
cc_test(stringprintf_test SRCS printf_test.cc DEPS glog gflags) cc_test(stringprintf_test SRCS printf_test.cc DEPS glog gflags)
cc_test(to_string_test SRCS to_string_test.cc)
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <sstream>
#include <string>
namespace paddle {
namespace string {
template <typename T>
inline std::string to_string(T v) {
std::ostringstream sout;
sout << v;
return sout.str();
}
// Faster std::string/const char* type
template <>
inline std::string to_string(std::string v) {
return v;
}
template <>
inline std::string to_string(const char* v) {
return std::string(v);
}
} // namespace string
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/string/to_string.h"
#include <gtest/gtest.h>
constexpr char kOutputString[] = "User Defined Output";
class UserDefinedClass {
public:
};
std::ostream& operator<<(std::ostream& s, const UserDefinedClass& ins) {
s << kOutputString;
return s;
}
TEST(to_string, normal) {
using namespace paddle::string;
ASSERT_EQ("10", to_string(10));
ASSERT_EQ("abc", to_string("abc"));
ASSERT_EQ("1.2", to_string(1.2));
}
TEST(to_string, user_defined) {
using namespace paddle::string;
UserDefinedClass instance;
ASSERT_EQ(kOutputString, to_string(instance));
}
\ No newline at end of file
...@@ -50,8 +50,8 @@ void NewRemoteParameterUpdater::init( ...@@ -50,8 +50,8 @@ void NewRemoteParameterUpdater::init(
// create parameter server client. // create parameter server client.
if (useEtcd_) { if (useEtcd_) {
parameterClient_ = paddle_new_etcd_pserver_client( parameterClient_ =
(char *)pserverSpec_.c_str(), FLAGS_trainer_id == 0); paddle_new_etcd_pserver_client((char *)pserverSpec_.c_str());
} else { } else {
parameterClient_ = paddle_new_pserver_client((char *)pserverSpec_.c_str(), parameterClient_ = paddle_new_pserver_client((char *)pserverSpec_.c_str(),
FLAGS_trainer_id == 0); FLAGS_trainer_id == 0);
...@@ -66,28 +66,92 @@ void NewRemoteParameterUpdater::init( ...@@ -66,28 +66,92 @@ void NewRemoteParameterUpdater::init(
// from parameter server // from parameter server
if (paddle_begin_init_params(parameterClient_)) { if (paddle_begin_init_params(parameterClient_)) {
LOG(INFO) << "paddle_begin_init_params start"; LOG(INFO) << "paddle_begin_init_params start";
// NOTE: convert V1 OptimizatioinConfig proto to V2 OptimizerConfig.
// This makes golang pserver compatible with handy V1 demos.
// TODO: Refine or remove these ugly converting lines
OptimizerConfig optimizerConfigV2;
if (trainerConfig_.learning_method() == "momentum") {
optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::SGD);
} else if (trainerConfig_.learning_method() == "adagrad") {
optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::Adagrad);
optimizerConfigV2.mutable_adagrad()->set_epsilon(
trainerConfig_.ada_epsilon());
} else if (trainerConfig_.learning_method() == "adadelta") {
optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::Adagrad);
optimizerConfigV2.mutable_adadelta()->set_epsilon(
trainerConfig_.ada_epsilon());
optimizerConfigV2.mutable_adadelta()->set_rho(trainerConfig_.ada_rou());
} else if (trainerConfig_.learning_method() == "adam") {
optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::Adam);
optimizerConfigV2.mutable_adam()->set_beta_1(trainerConfig_.adam_beta1());
optimizerConfigV2.mutable_adam()->set_beta_2(trainerConfig_.adam_beta2());
optimizerConfigV2.mutable_adam()->set_epsilon(
trainerConfig_.adam_epsilon());
} else {
LOG(ERROR) << "got unsupported v1 optimizer config: "
<< trainerConfig_.learning_method();
optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::SGD);
}
if (trainerConfig_.learning_rate_schedule() == "constant") {
optimizerConfigV2.set_lr_policy(paddle::OptimizerConfig::Const);
optimizerConfigV2.mutable_const_lr()->set_learning_rate(
trainerConfig_.learning_rate());
} else if (trainerConfig_.learning_rate_schedule() == "linear") {
optimizerConfigV2.set_lr_policy(paddle::OptimizerConfig::Linear);
optimizerConfigV2.mutable_linear_lr()->set_learning_rate(
trainerConfig_.learning_rate());
optimizerConfigV2.mutable_linear_lr()->set_lr_decay_a(
trainerConfig_.learning_rate_decay_a());
optimizerConfigV2.mutable_linear_lr()->set_lr_decay_b(
trainerConfig_.learning_rate_decay_b());
} else {
LOG(ERROR) << "got unsupported v1 learning_rate_schedule config: "
<< trainerConfig_.learning_rate_schedule() << ", set to const";
optimizerConfigV2.set_lr_policy(paddle::OptimizerConfig::Const);
}
// overwrite optimizerConfigV2 for per-parameter(layer) configs
for (int i = 0; i < parameterSize(); ++i) { for (int i = 0; i < parameterSize(); ++i) {
auto paramConfig = parameters_[i]->getConfig(); auto paramConfig = parameters_[i]->getConfig();
LOG(INFO) << "old param config: " << paramConfig.DebugString(); if (paramConfig.has_momentum() &&
// FIXME(typhoonzero): convert old paramConfig to optimizerConfig trainerConfig_.learning_method() == "momentum") {
OptimizerConfig optimizeConfigV2; optimizerConfigV2.mutable_sgd()->set_momentum(paramConfig.momentum());
auto sgdConfigV2 = optimizeConfigV2.mutable_sgd(); }
sgdConfigV2->set_momentum(paramConfig.momentum());
sgdConfigV2->set_decay(paramConfig.decay_rate());
optimizeConfigV2.set_lr_policy(paddle::OptimizerConfig::Const);
auto constlr = optimizeConfigV2.mutable_const_lr();
if (paramConfig.has_learning_rate()) { if (paramConfig.has_learning_rate()) {
constlr->set_learning_rate(paramConfig.learning_rate()); switch (optimizerConfigV2.lr_policy()) {
} else { case 0:
constlr->set_learning_rate(trainerConfig_.learning_rate()); optimizerConfigV2.mutable_const_lr()->set_learning_rate(
paramConfig.learning_rate());
break;
case 1:
optimizerConfigV2.mutable_linear_lr()->set_learning_rate(
paramConfig.learning_rate());
break;
}
}
if (paramConfig.has_decay_rate()) {
switch (optimizerConfigV2.optimizer()) {
case 1: // SGD
optimizerConfigV2.mutable_sgd()->set_decay(
paramConfig.decay_rate());
break;
case 2: // Adadelta
optimizerConfigV2.mutable_adadelta()->set_decay(
paramConfig.decay_rate());
break;
case 3: // Adagrad
optimizerConfigV2.mutable_adagrad()->set_decay(
paramConfig.decay_rate());
break;
case 4: // Adam
optimizerConfigV2.mutable_adam()->set_decay(
paramConfig.decay_rate());
break;
} }
if (trainerConfig_.algorithm() == "sgd") {
optimizeConfigV2.set_optimizer(paddle::OptimizerConfig::SGD);
// FIXME: config all algorithms
} else {
optimizeConfigV2.set_optimizer(paddle::OptimizerConfig::SGD);
} }
std::string bytes = optimizeConfigV2.SerializeAsString(); // send param and config to pserver
std::string bytes = optimizerConfigV2.SerializeAsString();
const char *array = bytes.data(); const char *array = bytes.data();
int size = (int)bytes.size(); int size = (int)bytes.size();
paddle_init_param( paddle_init_param(
......
...@@ -28,6 +28,8 @@ DECLARE_bool(with_cost); ...@@ -28,6 +28,8 @@ DECLARE_bool(with_cost);
DECLARE_bool(with_gpu); DECLARE_bool(with_gpu);
DECLARE_bool(parallel_nn); DECLARE_bool(parallel_nn);
DECLARE_string(config_args); DECLARE_string(config_args);
DECLARE_bool(use_mkldnn);
DECLARE_bool(use_mkldnn_wgt);
const char *kConfigParserModuleName = "paddle.trainer.config_parser"; const char *kConfigParserModuleName = "paddle.trainer.config_parser";
const char *kConfigParserFuncName = "parse_config_and_serialize"; const char *kConfigParserFuncName = "parse_config_and_serialize";
...@@ -44,6 +46,8 @@ TrainerConfigHelper::TrainerConfigHelper(const std::string &configFilePath) ...@@ -44,6 +46,8 @@ TrainerConfigHelper::TrainerConfigHelper(const std::string &configFilePath)
configArgs << "trainer_id=" << FLAGS_trainer_id << ",local=" << FLAGS_local configArgs << "trainer_id=" << FLAGS_trainer_id << ",local=" << FLAGS_local
<< ",with_cost=" << FLAGS_with_cost << ",use_gpu=" << FLAGS_use_gpu << ",with_cost=" << FLAGS_with_cost << ",use_gpu=" << FLAGS_use_gpu
<< ",parallel_nn=" << FLAGS_parallel_nn << ",parallel_nn=" << FLAGS_parallel_nn
<< ",use_mkldnn=" << FLAGS_use_mkldnn
<< ",use_mkldnn_wgt=" << FLAGS_use_mkldnn_wgt
<< ",cudnn_version=" << hl_get_cudnn_lib_version(); << ",cudnn_version=" << hl_get_cudnn_lib_version();
if (!FLAGS_config_args.empty()) { if (!FLAGS_config_args.empty()) {
configArgs << "," << FLAGS_config_args; configArgs << "," << FLAGS_config_args;
......
...@@ -2,19 +2,19 @@ ...@@ -2,19 +2,19 @@
add_unittest_without_exec(test_Compare add_unittest_without_exec(test_Compare
test_Compare.cpp) test_Compare.cpp)
add_test(NAME test_Compare add_test(NAME test_Compare
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python
${CMAKE_CURRENT_BINARY_DIR}/test_Compare ${CMAKE_CURRENT_BINARY_DIR}/test_Compare
WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
################# test_Trainer ########################### ################# test_Trainer ###########################
add_unittest_without_exec(test_Trainer add_unittest_without_exec(test_Trainer
test_Trainer.cpp) test_Trainer.cpp)
add_test(NAME test_Trainer add_test(NAME test_Trainer
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/
${PYTHON_EXECUTABLE} ${PROJ_ROOT}/paddle/trainer/tests/gen_proto_data.py && ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/paddle/trainer/tests/gen_proto_data.py &&
${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/
${CMAKE_CURRENT_BINARY_DIR}/test_Trainer ${CMAKE_CURRENT_BINARY_DIR}/test_Trainer
WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
############### test_TrainerOnePass ########################## ############### test_TrainerOnePass ##########################
if(WITH_PYTHON) if(WITH_PYTHON)
...@@ -23,60 +23,60 @@ if(WITH_PYTHON) ...@@ -23,60 +23,60 @@ if(WITH_PYTHON)
add_unittest_without_exec(test_TrainerOnePass add_unittest_without_exec(test_TrainerOnePass
test_TrainerOnePass.cpp) test_TrainerOnePass.cpp)
add_test(NAME test_TrainerOnePass add_test(NAME test_TrainerOnePass
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d
${PROJ_ROOT}/python/:${PROJ_ROOT}/paddle/trainer/tests ${PADDLE_SOURCE_DIR}/python/:${PADDLE_SOURCE_DIR}/paddle/trainer/tests
${PROJ_ROOT}/paddle/.set_port.sh -p port ${CMAKE_CURRENT_BINARY_DIR}/test_TrainerOnePass ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port ${CMAKE_CURRENT_BINARY_DIR}/test_TrainerOnePass
WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
endif() endif()
################ test_CompareTwoNets ###################### ################ test_CompareTwoNets ######################
add_unittest_without_exec(test_CompareTwoNets add_unittest_without_exec(test_CompareTwoNets
test_CompareTwoNets.cpp) test_CompareTwoNets.cpp)
add_test(NAME test_CompareTwoNets add_test(NAME test_CompareTwoNets
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/
${CMAKE_CURRENT_BINARY_DIR}/test_CompareTwoNets ${CMAKE_CURRENT_BINARY_DIR}/test_CompareTwoNets
--config_file_a=trainer/tests/sample_trainer_config_qb_rnn.conf --config_file_b=trainer/tests/sample_trainer_config_rnn.conf --config_file_a=trainer/tests/sample_trainer_config_qb_rnn.conf --config_file_b=trainer/tests/sample_trainer_config_rnn.conf
WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
############### test_CompareTwoOpts ################### ############### test_CompareTwoOpts ###################
add_unittest_without_exec(test_CompareTwoOpts add_unittest_without_exec(test_CompareTwoOpts
test_CompareTwoOpts.cpp) test_CompareTwoOpts.cpp)
add_test(NAME test_CompareTwoOpts add_test(NAME test_CompareTwoOpts
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/
${CMAKE_CURRENT_BINARY_DIR}/test_CompareTwoOpts ${CMAKE_CURRENT_BINARY_DIR}/test_CompareTwoOpts
--config_file_a=trainer/tests/sample_trainer_config_opt_a.conf --config_file_b=trainer/tests/sample_trainer_config_opt_b.conf --config_file_a=trainer/tests/sample_trainer_config_opt_a.conf --config_file_b=trainer/tests/sample_trainer_config_opt_b.conf
--num_passes=1 --need_high_accuracy=0 --num_passes=1 --need_high_accuracy=0
WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
################# test_CompareSparse ################## ################# test_CompareSparse ##################
add_unittest_without_exec(test_CompareSparse add_unittest_without_exec(test_CompareSparse
test_CompareSparse.cpp) test_CompareSparse.cpp)
if(NOT ON_TRAVIS) if(NOT ON_TRAVIS)
add_test(NAME test_CompareSparse add_test(NAME test_CompareSparse
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/
./.set_port.sh -p port -n 6 ./.set_port.sh -p port -n 6
${CMAKE_CURRENT_BINARY_DIR}/test_CompareSparse ${CMAKE_CURRENT_BINARY_DIR}/test_CompareSparse
WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
endif() endif()
################# test_recurrent_machine_generation ############### ################# test_recurrent_machine_generation ###############
add_unittest_without_exec(test_recurrent_machine_generation add_unittest_without_exec(test_recurrent_machine_generation
test_recurrent_machine_generation.cpp) test_recurrent_machine_generation.cpp)
add_test(NAME test_recurrent_machine_generation add_test(NAME test_recurrent_machine_generation
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/
${CMAKE_CURRENT_BINARY_DIR}/test_recurrent_machine_generation ${CMAKE_CURRENT_BINARY_DIR}/test_recurrent_machine_generation
WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
#################### test_PyDataProviderWrapper ######################### #################### test_PyDataProviderWrapper #########################
add_unittest_without_exec(test_PyDataProviderWrapper add_unittest_without_exec(test_PyDataProviderWrapper
test_PyDataProviderWrapper.cpp) test_PyDataProviderWrapper.cpp)
add_test(NAME test_PyDataProviderWrapper add_test(NAME test_PyDataProviderWrapper
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d
${PROJ_ROOT}/python/:${PROJ_ROOT}/paddle/trainer/tests ${PADDLE_SOURCE_DIR}/python/:${PADDLE_SOURCE_DIR}/paddle/trainer/tests
${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProviderWrapper ${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProviderWrapper
WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
#################### test_config_parser ######################### #################### test_config_parser #########################
add_test(NAME test_config_parser add_test(NAME test_config_parser
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/
${PYTHON_EXECUTABLE} ${PROJ_ROOT}/paddle/trainer/tests/config_parser_test.py ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/paddle/trainer/tests/config_parser_test.py
WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
...@@ -20,6 +20,14 @@ DEFINE_bool(use_gpu, false, "Only support CPU training"); ...@@ -20,6 +20,14 @@ DEFINE_bool(use_gpu, false, "Only support CPU training");
DEFINE_bool(use_gpu, true, "Whether to use GPU for training"); DEFINE_bool(use_gpu, true, "Whether to use GPU for training");
#endif #endif
#ifdef PADDLE_USE_MKLDNN
// TODO(TJ): change to true when MKLDNN layers support multi-inputs
DEFINE_bool(use_mkldnn, false, "Default still keep use CPU training");
#else
DEFINE_bool(use_mkldnn, false, "Only support CPU training");
#endif
DEFINE_bool(use_mkldnn_wgt, false, "Init weight from CPU weight");
DEFINE_bool(parallel_nn, DEFINE_bool(parallel_nn,
false, false,
"Whether to use multi-threads to calculate one neural network." "Whether to use multi-threads to calculate one neural network."
......
...@@ -40,3 +40,5 @@ DECLARE_bool(show_layer_stat); ...@@ -40,3 +40,5 @@ DECLARE_bool(show_layer_stat);
DECLARE_string(predict_file); DECLARE_string(predict_file);
DECLARE_bool(prev_batch_state); DECLARE_bool(prev_batch_state);
DECLARE_string(init_model_path); DECLARE_string(init_model_path);
DECLARE_bool(use_mkldnn);
DECLARE_bool(use_mkldnn_wgt);
...@@ -13,6 +13,6 @@ add_executable( ...@@ -13,6 +13,6 @@ add_executable(
link_paddle_exe(test_CustomStackTracePrint) link_paddle_exe(test_CustomStackTracePrint)
if(NOT APPLE) if(NOT APPLE)
add_test(NAME test_CustomStackTracePrint add_test(NAME test_CustomStackTracePrint
COMMAND ${PROJ_ROOT}/paddle/utils/tests/test_CustomStackTracePrint.sh COMMAND ${PADDLE_SOURCE_DIR}/paddle/utils/tests/test_CustomStackTracePrint.sh
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif() endif()
...@@ -9,13 +9,13 @@ foreach(filename ${proto_filenames}) ...@@ -9,13 +9,13 @@ foreach(filename ${proto_filenames})
get_filename_component(ABS_FIL ${filename} ABSOLUTE) get_filename_component(ABS_FIL ${filename} ABSOLUTE)
get_filename_component(FIL_WE ${filename} NAME_WE) get_filename_component(FIL_WE ${filename} NAME_WE)
set(CUR_PROTO_GEN_PY set(CUR_PROTO_GEN_PY
${PROJ_ROOT}/paddle/python/paddle/proto/${FIL_WE}_pb2.py) ${PADDLE_SOURCE_DIR}/paddle/python/paddle/proto/${FIL_WE}_pb2.py)
set(PROTO_GEN_PY set(PROTO_GEN_PY
${CUR_PROTO_GEN_PY} ${CUR_PROTO_GEN_PY}
${PROTO_GEN_PY}) ${PROTO_GEN_PY})
add_custom_command(OUTPUT ${CUR_PROTO_GEN_PY} add_custom_command(OUTPUT ${CUR_PROTO_GEN_PY}
COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} COMMAND ${PROTOBUF_PROTOC_EXECUTABLE}
ARGS "--python_out=${PROJ_ROOT}/python/paddle/proto" ARGS "--python_out=${PADDLE_SOURCE_DIR}/python/paddle/proto"
"-I" ${CMAKE_CURRENT_SOURCE_DIR} ${ABS_FIL} "-I" ${CMAKE_CURRENT_SOURCE_DIR} ${ABS_FIL}
DEPENDS ${ABS_FIL} protoc) DEPENDS ${ABS_FIL} protoc)
endforeach() endforeach()
......
set(OUTPUT_DIR
"${CMAKE_CURRENT_BINARY_DIR}/build")
file(GLOB TRAINER_PY_FILES . ./paddle/trainer/*.py) file(GLOB TRAINER_PY_FILES . ./paddle/trainer/*.py)
file(GLOB HELPERS_PY_FILES . ./paddle/trainer_config_helpers/*.py) file(GLOB HELPERS_PY_FILES . ./paddle/trainer_config_helpers/*.py)
...@@ -18,7 +16,7 @@ SET(COPY_PADDLE_MASTER "") ...@@ -18,7 +16,7 @@ SET(COPY_PADDLE_MASTER "")
if(WITH_GOLANG) if(WITH_GOLANG)
SET(COPY_PADDLE_MASTER "copy_paddle_master") SET(COPY_PADDLE_MASTER "copy_paddle_master")
add_custom_command(TARGET ${COPY_PADDLE_MASTER} add_custom_command(TARGET ${COPY_PADDLE_MASTER}
COMMAND cp ${paddle_master_LIB_PATH} ${PROJ_ROOT}/python/paddle/v2/master/ COMMAND cp ${paddle_master_LIB_PATH} ${PADDLE_SOURCE_DIR}/python/paddle/v2/master/
) )
add_dependencies(copy_paddle_master paddle_master) add_dependencies(copy_paddle_master paddle_master)
endif(WITH_GOLANG) endif(WITH_GOLANG)
...@@ -27,19 +25,21 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in ...@@ -27,19 +25,21 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
${CMAKE_CURRENT_BINARY_DIR}/setup.py) ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
add_custom_command(OUTPUT ${PROJ_ROOT}/python/paddle/v2/framework/core.so add_custom_command(OUTPUT ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/core.so
COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind> ${PROJ_ROOT}/python/paddle/v2/framework/core.so COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind> ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/core.so
DEPENDS paddle_pybind) DEPENDS paddle_pybind)
add_custom_target(copy_paddle_pybind ALL DEPENDS ${PROJ_ROOT}/python/paddle/v2/framework/core.so) add_custom_target(copy_paddle_pybind ALL DEPENDS ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/core.so)
add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT_DIR}/.timestamp COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/lib-python
COMMAND ${CMAKE_COMMAND} -E copy_directory ${PADDLE_PYTHON_BUILD_DIR}/lib* ${PADDLE_PYTHON_BUILD_DIR}/lib-python
DEPENDS gen_proto_py copy_paddle_pybind framework_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER}) DEPENDS gen_proto_py copy_paddle_pybind framework_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER})
add_custom_target(paddle_python ALL DEPENDS add_custom_target(paddle_python ALL DEPENDS
${OUTPUT_DIR}/.timestamp paddle_pserver_main paddle_trainer paddle_merge_model python_api_wheel) ${PADDLE_PYTHON_BUILD_DIR}/.timestamp paddle_pserver_main paddle_trainer paddle_merge_model python_api_wheel)
set(PADDLE_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/) set(PADDLE_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/)
......
...@@ -1604,6 +1604,8 @@ class MultiClassCrossEntropySelfNormCostLayer(LayerBase): ...@@ -1604,6 +1604,8 @@ class MultiClassCrossEntropySelfNormCostLayer(LayerBase):
@config_layer('fc') @config_layer('fc')
class FCLayer(LayerBase): class FCLayer(LayerBase):
layer_type = 'fc'
def __init__(self, def __init__(self,
name, name,
size, size,
...@@ -1611,14 +1613,27 @@ class FCLayer(LayerBase): ...@@ -1611,14 +1613,27 @@ class FCLayer(LayerBase):
bias=True, bias=True,
error_clipping_threshold=None, error_clipping_threshold=None,
**xargs): **xargs):
super(FCLayer, self).__init__(name, 'fc', size, inputs=inputs, **xargs) use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0)))
use_mkldnn_wgt = bool(
int(g_command_config_args.get("use_mkldnn_wgt", 0)))
if use_mkldnn:
self.layer_type = 'mkldnn_fc'
config_assert(
len(inputs) == 1,
"MkldnnFCLayer support one and only one input!")
super(FCLayer, self).__init__(
name, self.layer_type, size, inputs=inputs, **xargs)
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index) input_layer = self.get_input_layer(input_index)
psize = self.config.size * input_layer.size psize = self.config.size * input_layer.size
dims = [input_layer.size, self.config.size] dims = [input_layer.size, self.config.size]
format = self.inputs[input_index].format format = self.inputs[input_index].format
sparse = format == "csr" or format == "csc" sparse = format == "csr" or format == "csc"
if use_mkldnn:
config_assert(not sparse,
"MkldnnFCLayer do not support sparse format yet")
if use_mkldnn_wgt:
dims = [self.config.size, input_layer.size]
if sparse: if sparse:
psize = self.inputs[input_index].nnz psize = self.inputs[input_index].nnz
else: else:
...@@ -1631,6 +1646,11 @@ class FCLayer(LayerBase): ...@@ -1631,6 +1646,11 @@ class FCLayer(LayerBase):
self.config.error_clipping_threshold = error_clipping_threshold self.config.error_clipping_threshold = error_clipping_threshold
@config_layer('mkldnn_fc')
class MkldnnFcLayer(FCLayer):
layer_type = 'mkldnn_fc'
@config_layer('selective_fc') @config_layer('selective_fc')
class SelectiveFCLayer(LayerBase): class SelectiveFCLayer(LayerBase):
def __init__(self, def __init__(self,
......
#################### test_config_parser ######################### #################### test_config_parser #########################
add_test(NAME layers_test add_test(NAME layers_test
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/
${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/layers_test.py ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/python/paddle/trainer_config_helpers/tests/layers_test.py
WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/python/paddle)
add_test(NAME test_reset_hook add_test(NAME test_reset_hook
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/
${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/test_reset_hook.py ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/python/paddle/trainer_config_helpers/tests/test_reset_hook.py
WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/python/paddle)
add_paddle_exe(protobuf_equal ProtobufEqualMain.cpp) add_paddle_exe(protobuf_equal ProtobufEqualMain.cpp)
add_test(NAME test_layerHelpers add_test(NAME test_layerHelpers
COMMAND COMMAND
${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh ${PYTHON_EXECUTABLE}
${CMAKE_CURRENT_BINARY_DIR}/protobuf_equal ${CMAKE_CURRENT_BINARY_DIR}/protobuf_equal
) )
py_test(test_net SRCS test_net.py) py_test(test_net SRCS test_net.py)
py_test(test_fc_op SRCS test_fc_op.py)
py_test(test_scope SRCS test_scope.py) py_test(test_scope SRCS test_scope.py)
py_test(test_tensor SRCS test_tensor.py) py_test(test_tensor SRCS test_tensor.py)
...@@ -21,5 +20,7 @@ py_test(gradient_checker SRCS gradient_checker.py) ...@@ -21,5 +20,7 @@ py_test(gradient_checker SRCS gradient_checker.py)
py_test(test_rowwise_add_op SRCS test_rowwise_add_op.py) py_test(test_rowwise_add_op SRCS test_rowwise_add_op.py)
py_test(test_default_scope_funcs SRCS test_default_scope_funcs.py) py_test(test_default_scope_funcs SRCS test_default_scope_funcs.py)
py_test(test_operator SRCS test_operator.py) py_test(test_operator SRCS test_operator.py)
# py_test(test_gaussian_random_op SRCS test_gaussian_random_op.py)
py_test(test_uniform_random_op SRCS test_uniform_random_op.py) py_test(test_uniform_random_op SRCS test_uniform_random_op.py)
...@@ -92,15 +92,27 @@ def get_numeric_gradient(op, ...@@ -92,15 +92,27 @@ def get_numeric_gradient(op,
class GradientChecker(unittest.TestCase): class GradientChecker(unittest.TestCase):
def __is_close(self, numeric_grads, scope, max_relative_error): def assert_is_close(self, numeric_grads, scope, max_relative_error,
msg_prefix):
for name in numeric_grads: for name in numeric_grads:
op_grad = numpy.array( b = numpy.array(scope.find_var(grad_var_name(name)).get_tensor())
scope.find_var(grad_var_name(name)).get_tensor()) a = numeric_grads[name]
is_close = numpy.allclose(
numeric_grads[name], op_grad, rtol=max_relative_error, atol=100) abs_a = numpy.abs(a)
if not is_close: # if abs_a is nearly zero, then use abs error for a, not relative
return False # error.
return True abs_a[abs_a < 1e-3] = 1
diff_mat = numpy.abs(a - b) / abs_a
max_diff = numpy.max(diff_mat)
def err_msg():
offset = numpy.argmax(diff_mat > max_relative_error)
return "%s Variable %s max gradient diff %f over limit %f, the first " \
"error element is %d" % (
msg_prefix, name, max_diff, max_relative_error, offset)
self.assertLessEqual(max_diff, max_relative_error, err_msg())
def check_grad(self, def check_grad(self,
forward_op, forward_op,
...@@ -145,7 +157,8 @@ class GradientChecker(unittest.TestCase): ...@@ -145,7 +157,8 @@ class GradientChecker(unittest.TestCase):
# get numeric gradient # get numeric gradient
for check_name in inputs_to_check: for check_name in inputs_to_check:
numeric_grad[check_name] = \ numeric_grad[check_name] = \
get_numeric_gradient(forward_op, input_vars, output_name, check_name) get_numeric_gradient(forward_op, input_vars, output_name,
check_name)
# get operator gradient according to different device # get operator gradient according to different device
for place in places: for place in places:
...@@ -187,15 +200,8 @@ class GradientChecker(unittest.TestCase): ...@@ -187,15 +200,8 @@ class GradientChecker(unittest.TestCase):
backward_op.infer_shape(scope) backward_op.infer_shape(scope)
backward_op.run(scope, ctx) backward_op.run(scope, ctx)
if isinstance(place, core.CPUPlace): self.assert_is_close(numeric_grad, scope, max_relative_error,
msg = "CPU kernel gradient is not close to numeric gradient" "Gradient Check On %s" % str(place))
else:
if isinstance(place, core.GPUPlace):
msg = "GPU kernel gradient is not close to numeric gradient"
else:
raise ValueError("unknown place " + type(place))
self.assertTrue(
self.__is_close(numeric_grad, scope, max_relative_error), msg)
if __name__ == '__main__': if __name__ == '__main__':
......
import paddle.v2.framework.core as core
import unittest
import numpy
from paddle.v2.framework.op import Operator
class TestFc(unittest.TestCase):
def test_fc(self):
scope = core.Scope()
place = core.CPUPlace()
x = scope.new_var("X")
x_tensor = x.get_tensor()
x_tensor.set_dims([1000, 784])
x_tensor.alloc_float(place)
w = scope.new_var("W")
w_tensor = w.get_tensor()
w_tensor.set_dims([784, 100])
w_tensor.alloc_float(place)
w_tensor.set(numpy.random.random((784, 100)).astype("float32"), place)
# Set a real numpy array here.
# x_tensor.set(numpy.array([]))
op = Operator("fc", X="X", Y="Y", W="W")
for out in op.outputs():
if scope.find_var(out) is None:
scope.new_var(out).get_tensor()
tensor = scope.find_var("Y").get_tensor()
op.infer_shape(scope)
self.assertEqual([1000, 100], tensor.shape())
ctx = core.DeviceContext.create(place)
op.run(scope, ctx)
# After complete all ops, check Y is expect or not.
if __name__ == '__main__':
unittest.main()
import unittest
import paddle.v2.framework.core as core
from paddle.v2.framework.op import Operator
import numpy
class GaussianRandomTest(unittest.TestCase):
def test_cpu(self):
self.gaussian_random_test(place=core.CPUPlace())
def test_gpu(self):
if core.is_compile_gpu():
self.gaussian_random_test(place=core.GPUPlace(0))
def gaussian_random_test(self, place):
scope = core.Scope()
scope.new_var("Out").get_tensor()
op = Operator(
"gaussian_random",
Out="Out",
dims=[1000, 784],
mean=.0,
std=1.,
seed=10)
op.infer_shape(scope)
context = core.DeviceContext.create(place)
op.run(scope, context)
tensor = numpy.array(scope.find_var("Out").get_tensor())
self.assertAlmostEqual(numpy.mean(tensor), .0, delta=0.1)
self.assertAlmostEqual(numpy.std(tensor), 1., delta=0.1)
if __name__ == '__main__':
unittest.main()
...@@ -3,6 +3,15 @@ from paddle.v2.framework.op import Operator ...@@ -3,6 +3,15 @@ from paddle.v2.framework.op import Operator
import unittest import unittest
def fc(X, W, Y):
ret_v = core.Net.create()
ret_v.add_op(Operator("mul", X="X", Y="W", Out="pre_activation"))
ret_v.add_op(Operator("sigmoid", X="pre_activation", Y=Y))
ret_v.complete_add_op(True)
return ret_v
class TestNet(unittest.TestCase): class TestNet(unittest.TestCase):
def test_net_all(self): def test_net_all(self):
net = core.Net.create() net = core.Net.create()
...@@ -10,18 +19,18 @@ class TestNet(unittest.TestCase): ...@@ -10,18 +19,18 @@ class TestNet(unittest.TestCase):
net.add_op(op1) net.add_op(op1)
net2 = core.Net.create() net2 = core.Net.create()
net2.add_op(Operator("fc", X="X", W="w", Y="fc.out")) net2.add_op(fc(X="X", W="w", Y="fc.out"))
net2.complete_add_op(True) net2.complete_add_op(True)
net.add_op(net2) net.add_op(net2)
net.complete_add_op(True) net.complete_add_op(True)
expected = ''' expected = '''
Op(plain_net), inputs:(@EMPTY@, X, Y, w), outputs:(@TEMP@fc@0, Out, fc.out). Op(plain_net), inputs:(W, X, Y), outputs:(Out, fc.out, pre_activation).
Op(add_two), inputs:(X, Y), outputs:(Out). Op(add_two), inputs:(X, Y), outputs:(Out).
Op(plain_net), inputs:(@EMPTY@, X, w), outputs:(@TEMP@fc@0, fc.out). Op(plain_net), inputs:(W, X), outputs:(fc.out, pre_activation).
Op(fc), inputs:(X, w, @EMPTY@), outputs:(fc.out, @TEMP@fc@0). Op(plain_net), inputs:(W, X), outputs:(fc.out, pre_activation).
Op(mul), inputs:(X, w), outputs:(@TEMP@fc@0). Op(mul), inputs:(X, W), outputs:(pre_activation).
Op(sigmoid), inputs:(@TEMP@fc@0), outputs:(fc.out). Op(sigmoid), inputs:(pre_activation), outputs:(fc.out).
''' '''
self.assertEqual(expected, "\n" + str(net)) self.assertEqual(expected, "\n" + str(net))
......
import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
import paddle.trainer_config_helpers.optimizers as v1_optimizers #
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" """
Optimizers(update equation) for SGD method. Optimizers(update equation) for SGD method.
TODO(zhihong) : create new optimizer with proto config, add new optimizer here
TODO(yuyang18): Complete comments. TODO(yuyang18): Complete comments.
""" """
import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils
import paddle.trainer_config_helpers.optimizers as v1_optimizers
from paddle.proto.OptimizerConfig_pb2 import OptimizerConfig
__all__ = [ __all__ = [
'Momentum', 'Adam', 'Adamax', 'AdaGrad', 'DecayedAdaGrad', 'AdaDelta', 'Momentum', 'Adam', 'Adamax', 'AdaGrad', 'DecayedAdaGrad', 'AdaDelta',
'RMSProp', 'ModelAverage', 'L2Regularization' 'RMSProp', 'ModelAverage', 'L2Regularization'
...@@ -70,7 +83,8 @@ class Optimizer(object): ...@@ -70,7 +83,8 @@ class Optimizer(object):
gradient_machine.prefetch(in_args) gradient_machine.prefetch(in_args)
parameter_updater.getParametersRemote() parameter_updater.getParametersRemote()
:param pserver_spec: pserver location, eg: localhost:3000 :param pserver_spec: pserver location, eg: localhost:3000, if use etcd,
pserver_spec should be the etcd endpoints, eg: http://localhost:2379
:return: parameter_updater :return: parameter_updater
""" """
if is_local: if is_local:
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np import numpy as np
from paddle.proto.ParameterConfig_pb2 import ParameterConfig from paddle.proto.ParameterConfig_pb2 import ParameterConfig
import paddle.trainer.config_parser as cp import paddle.trainer.config_parser as cp
...@@ -113,16 +127,7 @@ class Parameters(object): ...@@ -113,16 +127,7 @@ class Parameters(object):
""" """
return iter(self.__param_conf__) return iter(self.__param_conf__)
def __getitem__(self, key): def __getter_inner(self, key, param_type):
"""
Get parameter by parameter name. It uses Python dict syntax.
:note: It will always copy the parameter from C++ side.
:param key: Parameter name
:type key: basestring
:return: parameter value
:rtype: np.ndarray
"""
import py_paddle.swig_paddle as api import py_paddle.swig_paddle as api
shape = self.get_shape(key) shape = self.get_shape(key)
...@@ -138,7 +143,7 @@ class Parameters(object): ...@@ -138,7 +143,7 @@ class Parameters(object):
each_gradient_machine, key) each_gradient_machine, key)
# for simplify implementation now, we always copy from C++ # for simplify implementation now, we always copy from C++
assert isinstance(param, api.Parameter) assert isinstance(param, api.Parameter)
val = param.getBuf(api.PARAMETER_VALUE) val = param.getBuf(param_type)
assert isinstance(val, api.Vector) assert isinstance(val, api.Vector)
val = val.copyToNumpyArray() val = val.copyToNumpyArray()
return val return val
...@@ -146,6 +151,19 @@ class Parameters(object): ...@@ -146,6 +151,19 @@ class Parameters(object):
raise RuntimeError("Unexpected branch") raise RuntimeError("Unexpected branch")
def __getitem__(self, key):
"""
Get parameter by parameter name. It uses Python dict syntax.
:note: It will always copy the parameter from C++ side.
:param key: Parameter name
:type key: basestring
:return: parameter value
:rtype: np.ndarray
"""
import py_paddle.swig_paddle as api
return self.__getter_inner(key, api.PARAMETER_VALUE)
def get_shape(self, key): def get_shape(self, key):
""" """
get shape of the parameter. get shape of the parameter.
...@@ -202,6 +220,19 @@ class Parameters(object): ...@@ -202,6 +220,19 @@ class Parameters(object):
""" """
return self.__getitem__(key=parameter_name) return self.__getitem__(key=parameter_name)
def get_grad(self, key):
"""
Get grandient by parameter name.
:note: It will always copy the parameter from C++ side.
:param key: parameter name
:type key: basestring
:return: The grandient matrix.
:rtype: np.ndarray
"""
import py_paddle.swig_paddle as api
return self.__getter_inner(key, api.PARAMETER_GRADIENT)
def set(self, parameter_name, value): def set(self, parameter_name, value):
""" """
Set parameter by parameter name & matrix. Set parameter by parameter name & matrix.
...@@ -250,7 +281,13 @@ class Parameters(object): ...@@ -250,7 +281,13 @@ class Parameters(object):
size = reduce(lambda a, b: a * b, param.shape) size = reduce(lambda a, b: a * b, param.shape)
f.write(struct.pack("IIQ", 0, 4, size)) f.write(struct.pack("IIQ", 0, 4, size))
param = param.astype(np.float32) param = param.astype(np.float32)
f.write(param.tostring()) s = param.tostring()
wrote_size = 0
buf = buffer(s, wrote_size, 65535)
while buf: # f.write crashes with big data blog.
f.write(buf)
wrote_size += 65535
buf = buffer(s, wrote_size, 65535)
def deserialize(self, name, f): def deserialize(self, name, f):
""" """
......
...@@ -161,14 +161,14 @@ class SGD(object): ...@@ -161,14 +161,14 @@ class SGD(object):
self.__parameter_updater__.update(each_param) self.__parameter_updater__.update(each_param)
cost_sum = out_args.sum() cost_sum = out_args.sum()
cost = cost_sum / len(data_batch) cost = cost_sum / len(data_batch)
self.__parameter_updater__.finishBatch(cost)
batch_evaluator.finish()
event_handler( event_handler(
v2_event.EndIteration( v2_event.EndIteration(
pass_id=pass_id, pass_id=pass_id,
batch_id=batch_id, batch_id=batch_id,
cost=cost, cost=cost,
evaluator=batch_evaluator)) evaluator=batch_evaluator))
self.__parameter_updater__.finishBatch(cost)
batch_evaluator.finish()
self.__parameter_updater__.finishPass() self.__parameter_updater__.finishPass()
pass_evaluator.finish() pass_evaluator.finish()
......
...@@ -45,14 +45,14 @@ setup(name='paddlepaddle', ...@@ -45,14 +45,14 @@ setup(name='paddlepaddle',
'': '${CMAKE_CURRENT_SOURCE_DIR}', '': '${CMAKE_CURRENT_SOURCE_DIR}',
# The paddle.v2.framework.proto will be generated while compiling. # The paddle.v2.framework.proto will be generated while compiling.
# So that package points to other directory. # So that package points to other directory.
'paddle.v2.framework.proto': '${PROJ_BINARY_ROOT}/paddle/framework', 'paddle.v2.framework.proto': '${PADDLE_BINARY_DIR}/paddle/framework',
'py_paddle': '${PROJ_ROOT}/paddle/py_paddle' 'py_paddle': '${PADDLE_SOURCE_DIR}/paddle/py_paddle'
}, },
scripts=['${PROJ_BINARY_ROOT}/paddle/scripts/paddle'], scripts=['${PADDLE_BINARY_DIR}/paddle/scripts/paddle'],
distclass=BinaryDistribution, distclass=BinaryDistribution,
data_files=[('/usr/local/opt/paddle/bin', data_files=[('/usr/local/opt/paddle/bin',
['${PROJ_BINARY_ROOT}/paddle/scripts/paddle_usage', ['${PADDLE_BINARY_DIR}/paddle/scripts/paddle_usage',
'${PROJ_BINARY_ROOT}/paddle/trainer/paddle_trainer', '${PADDLE_BINARY_DIR}/paddle/trainer/paddle_trainer',
'${PROJ_BINARY_ROOT}/paddle/trainer/paddle_merge_model', '${PADDLE_BINARY_DIR}/paddle/trainer/paddle_merge_model',
'${PROJ_BINARY_ROOT}/paddle/pserver/paddle_pserver_main'])] '${PADDLE_BINARY_DIR}/paddle/pserver/paddle_pserver_main'])]
) )
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册