diff --git a/CMakeLists.txt b/CMakeLists.txt index 2a6b0a20e441676c85c9ed8f8ad1a6e7abdf1ea8..c7d743e193e7d32dbc0b56f3bcb05b6c61f85f1d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,6 +36,8 @@ include(simd) ################################ Configurations ####################################### option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND}) option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND}) +option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." OFF) +option(WITH_MKLML "Compile PaddlePaddle with mklml package." OFF) option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON) option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON) @@ -74,6 +76,10 @@ if(ANDROID) "Disable PYTHON when cross-compiling for Android" FORCE) set(WITH_RDMA OFF CACHE STRING "Disable RDMA when cross-compiling for Android" FORCE) + set(WITH_MKLDNN OFF CACHE STRING + "Disable MKLDNN when cross-compiling for Android" FORCE) + set(WITH_MKLML OFF CACHE STRING + "Disable MKLML package when cross-compiling for Android" FORCE) endif(ANDROID) set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING @@ -87,6 +93,7 @@ endif() ######################################################################################## +include(external/mklml) # download mklml package include(external/zlib) # download, build, install zlib include(external/gflags) # download, build, install gflags include(external/glog) # download, build, install glog @@ -94,6 +101,7 @@ include(external/gtest) # download, build, install gtest include(external/protobuf) # download, build, install protobuf include(external/python) # download, build, install python include(external/openblas) # download, build, install openblas +include(external/mkldnn) # download, build, install mkldnn include(external/swig) # download, build, install swig include(external/warpctc) # download, build, install warpctc include(external/any) # download libn::any @@ -135,6 +143,10 @@ if(WITH_GPU) endif(NOT WITH_DSO) endif(WITH_GPU) +if(WITH_MKLDNN) + list(APPEND EXTERNAL_LIBS ${MKLDNN_LIBRARY} ${MKLDNN_IOMP_LIB}) +endif() + if(USE_NNPACK) include(external/nnpack) list(APPEND EXTERNAL_LIBS ${NNPACK_LIBS}) diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake index 913f711afff3b8f9f77b8da978a3b9e7165d0077..854066fd1d205c337fbdbe08997d88251095c799 100644 --- a/cmake/cblas.cmake +++ b/cmake/cblas.cmake @@ -15,23 +15,44 @@ set(CBLAS_FOUND OFF) -## Find MKL First. -set(INTEL_ROOT "/opt/intel" CACHE PATH "Folder contains intel libs") -set(MKL_ROOT ${INTEL_ROOT}/mkl CACHE PATH "Folder contains MKL") +## Find MKLML First. +if(WITH_MKLML AND MKLML_INC_DIR AND MKLML_LIB) + set(CBLAS_FOUND ON) + set(CBLAS_PROVIDER MKLML) + set(CBLAS_INC_DIR ${MKLML_INC_DIR}) + set(CBLAS_LIBRARIES ${MKLML_LIB}) + + add_definitions(-DPADDLE_USE_MKLML) + add_definitions(-DLAPACK_FOUND) + + message(STATUS "Found cblas and lapack in MKLML " + "(include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") + return() +endif() + +## Then find MKL. +set(INTEL_MKL_ROOT "/opt/intel/mkl" CACHE PATH "Folder contains intel mkl libs") +set(MKL_ROOT $ENV{MKL_ROOT} CACHE PATH "Folder contains env MKL") + +set(MKL_INCLUDE_SEARCH_PATHS + ${MKL_ROOT}/include + ${INTEL_MKL_ROOT}/include) +set(MKL_LIB_SEARCH_PATHS + ${MKL_ROOT}/lib + ${MKL_ROOT}/lib/intel64 + ${INTEL_MKL_ROOT}/lib + ${INTEL_MKL_ROOT}/lib/intel64) find_path(MKL_INC_DIR mkl.h PATHS - ${MKL_ROOT}/include) + ${MKL_INCLUDE_SEARCH_PATHS}) find_path(MKL_LAPACK_INC_DIR mkl_lapacke.h PATHS - ${MKL_ROOT}/include) + ${MKL_INCLUDE_SEARCH_PATHS}) find_library(MKL_CORE_LIB NAMES mkl_core PATHS - ${MKL_ROOT}/lib - ${MKL_ROOT}/lib/intel64) + ${MKL_LIB_SEARCH_PATHS}) find_library(MKL_SEQUENTIAL_LIB NAMES mkl_sequential PATHS - ${MKL_ROOT}/lib - ${MKL_ROOT}/lib/intel64) + ${MKL_LIB_SEARCH_PATHS}) find_library(MKL_INTEL_LP64 NAMES mkl_intel_lp64 PATHS - ${MKL_ROOT}/lib - ${MKL_ROOT}/lib/intel64) + ${MKL_LIB_SEARCH_PATHS}) if(MKL_LAPACK_INC_DIR AND MKL_INC_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64) set(CBLAS_FOUND ON) diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 7afab5d5344b704a9329e313a81379032ba0cc97..69220e03fe8e337205f31cb1f45e3e19ae4f5d1e 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -67,6 +67,30 @@ else() include_directories(${CUDA_TOOLKIT_INCLUDE}) endif(NOT WITH_GPU) +if(WITH_MKLDNN) + add_definitions(-DPADDLE_USE_MKLDNN) + if (WITH_MKLML AND MKLDNN_IOMP_DIR) + message(STATUS "Enable Intel OpenMP at ${MKLDNN_IOMP_DIR}") + set(OPENMP_FLAGS "-fopenmp") + set(CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS}) + set(CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS}) + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -L${MKLDNN_IOMP_DIR} -liomp5 -Wl,--as-needed") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${MKLDNN_IOMP_DIR} -liomp5 -Wl,--as-needed") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENMP_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_FLAGS}") + else() + find_package(OpenMP) + if(OPENMP_FOUND) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + else() + message(WARNING "Can not find OpenMP." + "Some performance features in MKLDNN may not be available") + endif() + endif() + +endif(WITH_MKLDNN) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SIMD_FLAG}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SIMD_FLAG}") diff --git a/cmake/external/gtest.cmake b/cmake/external/gtest.cmake index 77e06e983e9f8bfaf6320e3c67b85b692ed877fc..e3970073a1a0b946fa1db6642799719d7a9fcf4f 100644 --- a/cmake/external/gtest.cmake +++ b/cmake/external/gtest.cmake @@ -34,9 +34,15 @@ IF(WITH_TESTING) "${GTEST_INSTALL_DIR}/lib/libgtest_main.a" CACHE FILEPATH "gtest main libraries." FORCE) ENDIF(WIN32) + IF(WITH_MKLML) + # wait for mklml downloading completed + SET(GTEST_DEPENDS ${MKLML_PROJECT}) + ENDIF() + ExternalProject_Add( extern_gtest ${EXTERNAL_PROJECT_LOG_ARGS} + DEPENDS ${GTEST_DEPENDS} GIT_REPOSITORY "https://github.com/google/googletest.git" GIT_TAG "release-1.8.0" PREFIX ${GTEST_SOURCES_DIR} diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake new file mode 100644 index 0000000000000000000000000000000000000000..eff15de73f23db6dea3a7b79006bfec90d712ae5 --- /dev/null +++ b/cmake/external/mkldnn.cmake @@ -0,0 +1,72 @@ +# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +IF(NOT ${WITH_MKLDNN}) + return() +ENDIF(NOT ${WITH_MKLDNN}) + +INCLUDE(ExternalProject) + +SET(MKLDNN_PROJECT "extern_mkldnn") +SET(MKLDNN_SOURCES_DIR ${THIRD_PARTY_PATH}/mkldnn) +SET(MKLDNN_INSTALL_ROOT ${CMAKE_INSTALL_PREFIX}) +IF(NOT "$ENV{HOME}" STREQUAL "/root") + SET(MKLDNN_INSTALL_ROOT "$ENV{HOME}") +ENDIF() + +SET(MKLDNN_INSTALL_DIR "${MKLDNN_INSTALL_ROOT}/opt/paddle/third_party/mkldnn") +SET(MKLDNN_INCLUDE_DIR "${MKLDNN_INSTALL_DIR}/include" CACHE PATH "mkldnn include directory." FORCE) + +IF(WIN32) + MESSAGE(WARNING "It is not supported compiling with mkldnn in windows Paddle yet." + "Force WITH_MKLDNN=OFF") + SET(WITH_MKLDNN OFF) + return() +ELSE(WIN32) + SET(MKLDNN_LIBRARY "${MKLDNN_INSTALL_DIR}/lib/libmkldnn.so" CACHE FILEPATH "mkldnn library." FORCE) + MESSAGE(STATUS "Set ${MKLDNN_INSTALL_DIR}/lib to runtime path") + SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) + #SET(CMAKE_MACOSX_RPATH 1) # hold for MacOS + SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLDNN_INSTALL_DIR}/lib") +ENDIF(WIN32) + +INCLUDE_DIRECTORIES(${MKLDNN_INCLUDE_DIR}) + +IF(${CBLAS_PROVIDER} STREQUAL "MKLML") + SET(MKLDNN_DEPENDS ${MKLML_PROJECT}) + SET(MKLDNN_MKLROOT ${MKLML_ROOT}) + SET(MKLDNN_IOMP_LIB ${MKLML_IOMP_LIB}) + SET(MKLDNN_IOMP_DIR ${MKLML_LIB_DIR}) +ENDIF() + +ExternalProject_Add( + ${MKLDNN_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + DEPENDS ${MKLDNN_DEPENDS} + GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git" + GIT_TAG "v0.9" + PREFIX ${MKLDNN_SOURCES_DIR} + CONFIGURE_COMMAND mkdir -p <SOURCE_DIR>/build + BUILD_COMMAND cd <SOURCE_DIR>/build + && cmake .. -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} -DMKLROOT=${MKLDNN_MKLROOT} + && $(MAKE) + INSTALL_COMMAND cd <SOURCE_DIR>/build && $(MAKE) install + UPDATE_COMMAND "" +) + +ADD_LIBRARY(mkldnn SHARED IMPORTED GLOBAL) +SET_PROPERTY(TARGET mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIBRARY}) +ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT}) +MESSAGE(STATUS "Mkldnn library: ${MKLDNN_LIBRARY}") +LIST(APPEND external_project_dependencies mkldnn) diff --git a/cmake/external/mklml.cmake b/cmake/external/mklml.cmake new file mode 100644 index 0000000000000000000000000000000000000000..3f940756a4abb79aba7d3561db19db8532a0b673 --- /dev/null +++ b/cmake/external/mklml.cmake @@ -0,0 +1,64 @@ +# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +IF(NOT ${WITH_MKLML}) + return() +ENDIF(NOT ${WITH_MKLML}) + +INCLUDE(ExternalProject) + +SET(MKLML_PROJECT "extern_mklml") +SET(MKLML_VER "mklml_lnx_2018.0.20170425") +SET(MKLML_URL "https://github.com/01org/mkl-dnn/releases/download/v0.9/${MKLML_VER}.tgz") +SET(MKLML_SOURCE_DIR "${THIRD_PARTY_PATH}/mklml") +SET(MKLML_DOWNLOAD_DIR "${MKLML_SOURCE_DIR}/src/${MKLML_PROJECT}") +SET(MKLML_DST_DIR "opt/paddle/third_party/mklml") +SET(MKLML_INSTALL_ROOT "${CMAKE_INSTALL_PREFIX}") +IF(NOT "$ENV{HOME}" STREQUAL "/root") + SET(MKLML_INSTALL_ROOT "$ENV{HOME}") +ENDIF() + +SET(MKLML_INSTALL_DIR ${MKLML_INSTALL_ROOT}/${MKLML_DST_DIR}) +SET(MKLML_ROOT ${MKLML_INSTALL_DIR}/${MKLML_VER}) +SET(MKLML_INC_DIR ${MKLML_ROOT}/include) +SET(MKLML_LIB_DIR ${MKLML_ROOT}/lib) +SET(MKLML_LIB ${MKLML_LIB_DIR}/libmklml_intel.so) +SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so) +SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLML_ROOT}/lib") + +INCLUDE_DIRECTORIES(${MKLML_INC_DIR}) + +SET(mklml_cmakefile ${MKLML_DOWNLOAD_DIR}/CMakeLists.txt) +FILE(WRITE ${mklml_cmakefile} "PROJECT(MKLML)\n" + "cmake_minimum_required(VERSION 3.0)\n" + "install(DIRECTORY ${MKLML_VER}\n" + " DESTINATION ${MKLML_DST_DIR})\n") + +ExternalProject_Add( + ${MKLML_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + PREFIX ${MKLML_SOURCE_DIR} + DOWNLOAD_DIR ${MKLML_DOWNLOAD_DIR} + DOWNLOAD_COMMAND wget --no-check-certificate -O ${MKLML_DOWNLOAD_DIR}/${MKLML_VER}.tgz ${MKLML_URL} + && tar -xzf ${MKLML_DOWNLOAD_DIR}/${MKLML_VER}.tgz + DOWNLOAD_NO_PROGRESS 1 + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLML_INSTALL_ROOT} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLML_INSTALL_ROOT} +) + +ADD_LIBRARY(mklml SHARED IMPORTED GLOBAL) +SET_PROPERTY(TARGET mklml PROPERTY IMPORTED_LOCATION ${MKLML_LIB}) +ADD_DEPENDENCIES(mklml ${MKLML_PROJECT}) +LIST(APPEND external_project_dependencies mklml) diff --git a/cmake/flags.cmake b/cmake/flags.cmake index c31e62fc08b531a38a851b71a033e14277eff015..34fd348893058980964d723490d9cc220a157b5a 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -124,6 +124,7 @@ set(GPU_COMMON_FLAGS -Wno-error=literal-suffix -Wno-error=unused-local-typedefs -Wno-error=unused-function # Warnings in Numpy Header. + -Wno-error=array-bounds # Warnings in Eigen::array ) if (APPLE) diff --git a/go/cmd/master/master.go b/go/cmd/master/master.go index 9eaf8c04ae01fe7eebc92c51803bfcf977995ee3..287da694915ca383dc29e6d33201dc701cb7de87 100644 --- a/go/cmd/master/master.go +++ b/go/cmd/master/master.go @@ -1,3 +1,17 @@ +// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package main import ( diff --git a/go/cmd/pserver/pserver.go b/go/cmd/pserver/pserver.go index 652d7ba315d72ff19931b82a4b0d1c30b2ff8f37..20094fbab4d12a3feca7e3e2d8c064300ac877b1 100644 --- a/go/cmd/pserver/pserver.go +++ b/go/cmd/pserver/pserver.go @@ -1,3 +1,17 @@ +// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package main import ( diff --git a/go/connection/conn.go b/go/connection/conn.go index 977e8cc123707dbcf055bb77399adbc232c575a0..ffa8db689da307277f0943a0a71c7ace5ab21887 100644 --- a/go/connection/conn.go +++ b/go/connection/conn.go @@ -1,3 +1,17 @@ +// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package connection import ( diff --git a/go/master/CMakeLists.txt b/go/master/CMakeLists.txt index 30531e6469297be1624ea590ea71b1c996b58ed4..93efa4eaf7da8d502a17ec617823d08195c5e9ee 100644 --- a/go/master/CMakeLists.txt +++ b/go/master/CMakeLists.txt @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# if(WITH_TESTING) go_test(master_test) endif() diff --git a/go/master/c/CMakeLists.txt b/go/master/c/CMakeLists.txt index d900850be04e3f385cc7fbf341ef0bb9fe53e789..082d9f3f597db14d0731e0292d3b66d92a49d6c1 100644 --- a/go/master/c/CMakeLists.txt +++ b/go/master/c/CMakeLists.txt @@ -1 +1,15 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# go_library(paddle_master SHARED DEPS paddle_go_optimizer) diff --git a/go/master/c/client.go b/go/master/c/client.go index 2cbe164c7b406b189f44ec850796203f24779205..9f5733075f440c7d440f99f6117998135e715c36 100644 --- a/go/master/c/client.go +++ b/go/master/c/client.go @@ -1,3 +1,17 @@ +// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package main /* diff --git a/go/master/client.go b/go/master/client.go index 90b99470978d21480eb2d8097e7dec217b9524eb..7f33090dc714e3f181d13362505b7bd3ebb1cc90 100644 --- a/go/master/client.go +++ b/go/master/client.go @@ -1,3 +1,17 @@ +// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package master import ( diff --git a/go/master/client_internal_test.go b/go/master/client_internal_test.go index 70dc09bf9461142ff6498355a5858ba9a1320510..ee305e2c80f54ebee2e5011ca7ff0cf5e0612f41 100644 --- a/go/master/client_internal_test.go +++ b/go/master/client_internal_test.go @@ -1,3 +1,17 @@ +// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package master import ( diff --git a/go/master/client_test.go b/go/master/client_test.go index bc92dc5ac973d62434b71e09705143ac8fbbd2fa..a90062c753bf27467c722b5f1dd5f1f17990df2f 100644 --- a/go/master/client_test.go +++ b/go/master/client_test.go @@ -1,3 +1,17 @@ +// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package master_test import ( diff --git a/go/master/etcd_client.go b/go/master/etcd_client.go index 69dc6a8268748ad9a72eb10fc2789982f565d291..607e726251fe4a4487ed7ea1eb8343d3ed9587fe 100644 --- a/go/master/etcd_client.go +++ b/go/master/etcd_client.go @@ -1,3 +1,17 @@ +// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package master import ( diff --git a/go/master/inmem_store.go b/go/master/inmem_store.go index 57e75dc4e01b4bafa8153bcc7fbc82a9eb2b08f5..ffd663f7f0b25c29f0bab082d27b29dcfeb60826 100644 --- a/go/master/inmem_store.go +++ b/go/master/inmem_store.go @@ -1,3 +1,17 @@ +// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package master import "sync" diff --git a/go/master/service.go b/go/master/service.go index 262735f421ad7ae04050e9264a177ee4c46e68d0..2766720c28fb6de258162bc35f014abc1b3b74b1 100644 --- a/go/master/service.go +++ b/go/master/service.go @@ -1,3 +1,17 @@ +// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package master import ( diff --git a/go/master/service_internal_test.go b/go/master/service_internal_test.go index 9c0d1d0a39fc8cb2b29fd0e3a4ba0c9b255f80fb..69a882fc33668a8cdefa30ae394f6c605f3bf099 100644 --- a/go/master/service_internal_test.go +++ b/go/master/service_internal_test.go @@ -1,3 +1,17 @@ +// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package master import "testing" diff --git a/go/pserver/CMakeLists.txt b/go/pserver/CMakeLists.txt index 6267040a6eb421ef5006a83625cf24a8124f5320..4fe0a8cb021e8dbf443c8f33bfb046e228a2fd8d 100644 --- a/go/pserver/CMakeLists.txt +++ b/go/pserver/CMakeLists.txt @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# if(WITH_TESTING) go_test(pserver_test DEPS paddle_go_optimizer) endif() diff --git a/go/pserver/client/CMakeLists.txt b/go/pserver/client/CMakeLists.txt index 0052bb460bbe3a8fc1e898cac8c3d42caec098a7..e295611060043a77bb1f19fc7053beddc9fbc327 100644 --- a/go/pserver/client/CMakeLists.txt +++ b/go/pserver/client/CMakeLists.txt @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# if(WITH_TESTING) go_test(pserver_client_test DEPS paddle_go_optimizer) endif() diff --git a/go/pserver/client/c/CMakeLists.txt b/go/pserver/client/c/CMakeLists.txt index c6333eab550c9a2b71bcaf20b69b2bc0a9b9c529..a932791c7cb003dc812de4eed923e7c10b25c363 100644 --- a/go/pserver/client/c/CMakeLists.txt +++ b/go/pserver/client/c/CMakeLists.txt @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# cc_library(paddle_go_optimizer DEPS paddle_optimizer paddle_proto glog gflags protobuf) target_link_libraries(paddle_go_optimizer stdc++ m) diff --git a/go/pserver/client/c/cclient.go b/go/pserver/client/c/cclient.go index 718b4304c80791b4d8a8816f256c8fa93e0b1ead..24cd922ffe85f0a0c5b68cb6bb87c38a4962f292 100644 --- a/go/pserver/client/c/cclient.go +++ b/go/pserver/client/c/cclient.go @@ -1,3 +1,17 @@ +// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package main /* diff --git a/go/pserver/client/c/test/CMakeLists.txt b/go/pserver/client/c/test/CMakeLists.txt index dce8645ce753f6a14b298726c714be18de3834e4..3724ccb60b72bf446058c26c7ad3ee4d23f1ccce 100644 --- a/go/pserver/client/c/test/CMakeLists.txt +++ b/go/pserver/client/c/test/CMakeLists.txt @@ -1,2 +1,16 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# cc_test(test_cclient SRCS test_cclient.c DEPS paddle_pserver_cclient paddle_go_optimizer) add_style_check_target(test_cclient test_cclient.c) diff --git a/go/pserver/client/c/test/test_cclient.c b/go/pserver/client/c/test/test_cclient.c index 8eababbe33914d25f1eb91b991e11eaacd2e4716..f9b99674340ddc2608ef68011f983190f4c64362 100644 --- a/go/pserver/client/c/test/test_cclient.c +++ b/go/pserver/client/c/test/test_cclient.c @@ -1,3 +1,17 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + #include <stdio.h> #include <stdlib.h> diff --git a/go/pserver/client/client.go b/go/pserver/client/client.go index b4a45e1c21056550ef9264746bcf58a8abb369a1..ddb749d6294707e77bfd1881ec3349b547240c86 100644 --- a/go/pserver/client/client.go +++ b/go/pserver/client/client.go @@ -1,3 +1,17 @@ +// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package client import ( diff --git a/go/pserver/client/client_test.go b/go/pserver/client/client_test.go index 5c89882a297323034be2875a6d4cb71d715eb0c2..b630d434dca283df67f5b850b35057870fe27529 100644 --- a/go/pserver/client/client_test.go +++ b/go/pserver/client/client_test.go @@ -1,3 +1,17 @@ +// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package client_test import ( diff --git a/go/pserver/client/etcd_client.go b/go/pserver/client/etcd_client.go index 953065b427ed52d39f1253ea94d485b765ea5dc2..b6ff1fec8a6f37f61f38cb5d004b1d2c886473ed 100644 --- a/go/pserver/client/etcd_client.go +++ b/go/pserver/client/etcd_client.go @@ -1,3 +1,17 @@ +// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package client import ( @@ -66,10 +80,10 @@ func (p *EtcdClient) List() []Server { for { for i := 0; i < psDesired; i++ { ctx, cancel := context.WithTimeout(context.Background(), p.timeout) - cancel() psKey := pserver.PsPath + strconv.Itoa(i) log.Debugf("checking %s", psKey) resp, err := p.client.Get(ctx, psKey) + cancel() if err != nil { log.Infof("Get psKey= %s error, %v", psKey, err) time.Sleep(p.timeout) diff --git a/go/pserver/etcd_client.go b/go/pserver/etcd_client.go index e70e826975b26db302a6799e9171cff970153aac..98ff8ce827c7cfcd9122cb043f2a6226057cc95a 100644 --- a/go/pserver/etcd_client.go +++ b/go/pserver/etcd_client.go @@ -1,3 +1,17 @@ +// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package pserver import ( diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go index 151a3f80332b0e62767586f9f769c839ba19ce1e..709160d45d98b6cf6d60f52ceb3fb33e0a0bd17d 100644 --- a/go/pserver/optimizer.go +++ b/go/pserver/optimizer.go @@ -1,3 +1,17 @@ +// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package pserver // #cgo CFLAGS: -I ../../ diff --git a/go/pserver/optimizer_test.go b/go/pserver/optimizer_test.go index d19e9de92e0b33b1d9619adb615a24884097a38f..d001e6993e6aed2f5829c1b86928af30f4900c8a 100644 --- a/go/pserver/optimizer_test.go +++ b/go/pserver/optimizer_test.go @@ -1,3 +1,17 @@ +// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package pserver import ( diff --git a/go/pserver/service.go b/go/pserver/service.go index c723959d6b87524762e2f874bb5e4d5bd567cd00..46738413f0a28217dac484a1a219a108790e4921 100644 --- a/go/pserver/service.go +++ b/go/pserver/service.go @@ -1,3 +1,17 @@ +// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package pserver import ( diff --git a/go/pserver/service_test.go b/go/pserver/service_test.go index a191f689fea9b5e64204c3ddfd12edf92f5ddb09..988f3b5acb82a95aeb54af2b8b0e4d39a458291a 100644 --- a/go/pserver/service_test.go +++ b/go/pserver/service_test.go @@ -1,3 +1,17 @@ +// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package pserver_test import ( diff --git a/go/utils/networkhelper/CMakeLists.txt b/go/utils/networkhelper/CMakeLists.txt index db6cf211d8c0b124856ca5c5fd2c49763b1b4a64..9233264ff3c5eadaae2c432066281fb721e38773 100644 --- a/go/utils/networkhelper/CMakeLists.txt +++ b/go/utils/networkhelper/CMakeLists.txt @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# if(WITH_TESTING) go_test(network_helper_test) endif() diff --git a/go/utils/networkhelper/helper.go b/go/utils/networkhelper/helper.go index fbeaea8f5e7d93309befbd23063e474a4c6df46e..c3fc747bdaf54c34d6d9841343d4b21f784e9a7b 100644 --- a/go/utils/networkhelper/helper.go +++ b/go/utils/networkhelper/helper.go @@ -1,3 +1,17 @@ +// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package networkhelper import ( diff --git a/go/utils/networkhelper/helper_test.go b/go/utils/networkhelper/helper_test.go index 4208f9e358fc4345b73a2b8a9211b8889c1190d8..0bc02ad42a9aad283957fd819b14f882359c25a7 100644 --- a/go/utils/networkhelper/helper_test.go +++ b/go/utils/networkhelper/helper_test.go @@ -1,3 +1,17 @@ +// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package networkhelper import "testing" diff --git a/paddle/framework/eigen.h b/paddle/framework/eigen.h index 4ba4fd4d110330805faf2468bd406cb23c6f1b1c..2599b2950836acd44102265dff8bb903f5c8b371 100644 --- a/paddle/framework/eigen.h +++ b/paddle/framework/eigen.h @@ -65,14 +65,15 @@ template <typename T, int MajorType = Eigen::RowMajor, typename IndexType = Eigen::DenseIndex> struct EigenVector : public EigenTensor<T, 1, MajorType, IndexType> { // Flatten is to reshape a Tensor into a one dimension EigenVector - static typename EigenTensor<T, 1>::Type Flatten(Tensor& tensor) { - return EigenTensor<T, 1>::From( - tensor, make_ddim({static_cast<int>(product(tensor.dims_))})); + using Parent = EigenTensor<T, 1, MajorType, IndexType>; + static typename Parent::Type Flatten(Tensor& tensor) { + return Parent::From(tensor, + make_ddim({static_cast<int>(product(tensor.dims_))})); } - static typename EigenTensor<T, 1>::ConstType Flatten(const Tensor& tensor) { - return EigenTensor<T, 1>::From( - tensor, make_ddim({static_cast<int>(product(tensor.dims_))})); + static typename Parent::ConstType Flatten(const Tensor& tensor) { + return Parent::From(tensor, + make_ddim({static_cast<int>(product(tensor.dims_))})); } }; diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 93c6fad5d3d9f3de100d30161e6e438eb43816a2..a36f375d2e42ee3c46ddef42954335cba7eb88f2 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -48,25 +48,27 @@ class Tensor { template <typename T> const T* data() const { - CheckDims<T>(); + EnforceSufficientMemory<T>(); return reinterpret_cast<const T*>( reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_); } template <typename T> T* data() { - CheckDims<T>(); + EnforceSufficientMemory<T>(); return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_); } - template <typename T> + template <typename T, // must be POD types + typename std::enable_if<std::is_pod<T>::value>::type* = nullptr> T* mutable_data(DDim dims, platform::Place place) { - set_dims(dims); + Resize(dims); return mutable_data<T>(place); } - template <typename T> + template <typename T, // must be POD types + typename std::enable_if<std::is_pod<T>::value>::type* = nullptr> T* mutable_data(platform::Place place) { PADDLE_ENFORCE(product(dims_) > 0, "Tensor's numel must be larger than zero to call " @@ -95,11 +97,9 @@ class Tensor { } template <typename T> - void ShareDataFrom(const Tensor& src) { - src.CheckDims<T>(); - holder_ = src.holder_; - set_dims(src.dims()); - offset_ = src.offset_; + void ShareDataWith(const Tensor& src) { + src.EnforceSufficientMemory<T>(); + *this = src; } template <typename T> @@ -107,9 +107,9 @@ class Tensor { PADDLE_ENFORCE(platform::is_cpu_place(src.holder_->place()) && platform::is_cpu_place(dst_place), "Tensor::CopyFrom only support CPU now."); - src.CheckDims<T>(); + src.EnforceSufficientMemory<T>(); size_t size = product(src.dims_) * sizeof(T); - set_dims(src.dims()); + Resize(src.dims()); const void* src_ptr = static_cast<const void*>(src.data<T>()); void* dst_ptr = static_cast<void*>(mutable_data<T>(dst_place)); memcpy(dst_ptr, src_ptr, size); @@ -117,34 +117,25 @@ class Tensor { template <typename T> Tensor Slice(const int& begin_idx, const int& end_idx) const { - CheckDims<T>(); - PADDLE_ENFORCE(begin_idx >= 0 && end_idx <= dims_[0], - "Slice index is less than zero or out of bound."); + EnforceSufficientMemory<T>(); + PADDLE_ENFORCE(begin_idx >= 0, "Slice begin index is less than zero."); + PADDLE_ENFORCE(end_idx <= dims_[0], "Slice end index is out of bound."); PADDLE_ENFORCE(begin_idx < end_idx, "Begin index must be less than end index."); PADDLE_ENFORCE(dims_[0] != 1, "Can not slice a tensor with dims_[0] = 1."); - std::vector<int> d = vectorize(dims_); - int base = 1; - for (size_t i = 1; i < d.size(); ++i) { - base *= d[i]; - } + int base = product(dims_) / dims_[0]; Tensor dst; dst.holder_ = holder_; DDim dst_dims = dims_; dst_dims[0] = end_idx - begin_idx; - dst.set_dims(dst_dims); + dst.Resize(dst_dims); dst.offset_ = offset_ + begin_idx * base * sizeof(T); return dst; } - void set_dims(const DDim& dims) { - if (dims == dims_) { - return; - } - dims_ = dims; - } + void Resize(const DDim& dims) { dims_ = dims; } - DDim dims() const { return dims_; } + const DDim& dims() const { return dims_; } private: // Placeholder hides type T, so it doesn't appear as a template @@ -159,21 +150,9 @@ class Tensor { template <typename T, typename PlaceType> struct PlaceholderImpl : public Placeholder { - private: - template <typename PType> - class Deleter { - public: - Deleter(PType place) : place_(place) {} - void operator()(T* ptr) { memory::Free(place_, static_cast<void*>(ptr)); } - - private: - PType place_; - }; - - public: PlaceholderImpl(PlaceType place, size_t size) : ptr_(static_cast<T*>(memory::Alloc(place, size)), - Deleter<PlaceType>(place)), + memory::PODDeleter<T, PlaceType>(place)), place_(place), size_(size) {} @@ -182,13 +161,13 @@ class Tensor { virtual paddle::platform::Place place() const { return place_; } virtual std::type_index type() const { return std::type_index(typeid(T)); } - std::unique_ptr<T, Deleter<PlaceType>> ptr_; + std::unique_ptr<T, memory::PODDeleter<T, PlaceType>> ptr_; platform::Place place_; // record the place of ptr_. size_t size_; // size of the memory block. }; template <typename T> - inline void CheckDims() const { + inline void EnforceSufficientMemory() const { PADDLE_ENFORCE(holder_ != nullptr, "Tenosr holds no memory. Call Tensor::mutable_data first."); PADDLE_ENFORCE(holder_->size() >= product(dims_) * sizeof(T) + offset_, @@ -198,7 +177,11 @@ class Tensor { std::shared_ptr<Placeholder> holder_; // holds the memory block if allocated. DDim dims_; - size_t offset_; // marks the begin of tensor data area. + // A PlaceHolder may be shared by more than one tensor. Some of them may be + // slices of the others. So the offset_ is introduced here to indicate the + // byte offset between PlaceHolder::ptr_ and where tensor's data really + // begins. + size_t offset_; }; } // namespace framework diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc index 8a7cbbd0de6fd6aaafa8649abb8628e971bc49c1..089844dc0164dae8067846a8e6846d47fb1b0833 100644 --- a/paddle/framework/tensor_test.cc +++ b/paddle/framework/tensor_test.cc @@ -19,7 +19,7 @@ TEST(Tensor, Dims) { using namespace paddle::framework; using namespace paddle::platform; Tensor tt; - tt.set_dims(make_ddim({2, 3, 4})); + tt.Resize(make_ddim({2, 3, 4})); DDim dims = tt.dims(); ASSERT_EQ(arity(dims), 3); for (int i = 0; i < 3; ++i) { @@ -97,7 +97,7 @@ TEST(Tensor, MutableData) { #endif } -TEST(Tensor, ShareDataFrom) { +TEST(Tensor, ShareDataWith) { using namespace paddle::framework; using namespace paddle::platform; { @@ -106,7 +106,7 @@ TEST(Tensor, ShareDataFrom) { // Try to share data form uninitialized tensor bool caught = false; try { - dst_tensor.ShareDataFrom<float>(src_tensor); + dst_tensor.ShareDataWith<float>(src_tensor); } catch (std::runtime_error& err) { caught = true; std::string msg = @@ -119,7 +119,7 @@ TEST(Tensor, ShareDataFrom) { ASSERT_TRUE(caught); src_tensor.mutable_data<int>(make_ddim({2, 3, 4}), CPUPlace()); - dst_tensor.ShareDataFrom<int>(src_tensor); + dst_tensor.ShareDataWith<int>(src_tensor); ASSERT_EQ(src_tensor.data<int>(), dst_tensor.data<int>()); } @@ -128,7 +128,7 @@ TEST(Tensor, ShareDataFrom) { Tensor src_tensor; Tensor dst_tensor; src_tensor.mutable_data<int>(make_ddim({2, 3, 4}), GPUPlace()); - dst_tensor.ShareDataFrom<int>(src_tensor); + dst_tensor.ShareDataWith<int>(src_tensor); ASSERT_EQ(src_tensor.data<int>(), dst_tensor.data<int>()); } #endif diff --git a/paddle/function/ConvOpTest.cpp b/paddle/function/ConvOpTest.cpp index dfa2f784610b0dd60340e0ebc6a066437f3715eb..7f32c734791853a8cd0287a80a7955dbd1bd7571 100644 --- a/paddle/function/ConvOpTest.cpp +++ b/paddle/function/ConvOpTest.cpp @@ -31,13 +31,22 @@ public: ConvolutionTest(const std::string& conv1, const std::string& conv2, TestType type, + bool useGroups = true, std::string algo = "auto") { for (size_t batchSize : {1, 32}) { for (size_t inputSize : {7, 14, 54}) { for (size_t filterSize : {1, 3, 5}) { for (size_t inputChannels : {3, 64}) { - for (size_t outputChannels : {3, 64, 128}) { - if (inputChannels < outputChannels) break; + for (size_t outputChannels : {3, 64}) { + if (inputChannels > outputChannels) break; + size_t groups; + if (!useGroups) { + groups = 1; + } else { + if (outputChannels % inputChannels != 0) continue; + groups = inputChannels; + } + for (size_t stride : {1, 2}) { for (size_t padding : {0, 1}) { if (padding >= filterSize) break; @@ -62,13 +71,24 @@ public: FuncConfig() .set("paddings", paddings) .set("strides", strides) - .set("groups", (size_t)1) + .set("groups", groups) .set("algo", algo)); TensorShape input{ batchSize, inputChannels, inputSize, inputSize}; - TensorShape filter{ - outputChannels, inputChannels, filterSize, filterSize}; + + TensorShape filter; + if (groups > 1) + filter = TensorShape({groups, + outputChannels / groups, + inputChannels / groups, + filterSize, + filterSize}); + else + filter = TensorShape({outputChannels, + inputChannels, + filterSize, + filterSize}); TensorShape output{ batchSize, outputChannels, outputSize, outputSize}; @@ -85,7 +105,8 @@ public: } else if (type == kBackwardFilterTest) { test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output)); test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter)); + test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter), + ADD_TO); test.run(); } } @@ -106,6 +127,7 @@ public: ConvolutionTest2(const std::string& conv1, const std::string& conv2, TestType type, + bool useGroups = true, std::string algo = "auto") { for (size_t batchSize : {16}) { for (size_t inputHeight : {7, 31}) { @@ -113,7 +135,15 @@ public: for (size_t filterHeight : {1, 5}) { for (size_t filterWidth : {3, 7}) { for (size_t inputChannels : {7}) { - for (size_t outputChannels : {32}) { + for (size_t outputChannels : {7}) { + size_t groups; + if (!useGroups) { + groups = 1; + } else { + if (outputChannels % inputChannels != 0) continue; + groups = inputChannels; + } + size_t stride = 1; size_t padding = 0; size_t outputHeight = @@ -141,13 +171,24 @@ public: FuncConfig() .set("paddings", paddings) .set("strides", strides) - .set("groups", (size_t)1) + .set("groups", groups) .set("algo", algo)); TensorShape input{ batchSize, inputChannels, inputHeight, inputWidth}; - TensorShape filter{ - outputChannels, inputChannels, filterHeight, filterWidth}; + + TensorShape filter; + if (groups > 1) + filter = TensorShape({groups, + outputChannels / groups, + inputChannels / groups, + filterHeight, + filterWidth}); + else + filter = TensorShape({outputChannels, + inputChannels, + filterHeight, + filterWidth}); TensorShape output{ batchSize, outputChannels, outputHeight, outputWidth}; @@ -164,7 +205,8 @@ public: } else if (type == kBackwardFilterTest) { test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output)); test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter)); + test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter), + ADD_TO); test.run(); } } @@ -177,34 +219,88 @@ public: } }; +// ======Start Convolution TEST====== + TEST(Forward, GEMM) { ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU> test( - "NaiveConv-CPU", "GemmConv-CPU", kForwardTest); + "NaiveConv-CPU", "GemmConv-CPU", kForwardTest, false); ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU> test2( - "NaiveConv-CPU", "GemmConv-CPU", kForwardTest); + "NaiveConv-CPU", "GemmConv-CPU", kForwardTest, false); } #ifndef PADDLE_ONLY_CPU TEST(Forward, GEMM2) { ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test( - "GemmConv-CPU", "GemmConv-GPU", kForwardTest); + "GemmConv-CPU", "GemmConv-GPU", kForwardTest, false); ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2( - "GemmConv-CPU", "GemmConv-GPU", kForwardTest); + "GemmConv-CPU", "GemmConv-GPU", kForwardTest, false); } TEST(BackwardInput, GEMM) { ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test( - "GemmConvGradInput-CPU", "GemmConvGradInput-GPU", kBackwardInputTest); + "GemmConvGradInput-CPU", + "GemmConvGradInput-GPU", + kBackwardInputTest, + false); ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2( - "GemmConvGradInput-CPU", "GemmConvGradInput-GPU", kBackwardInputTest); + "GemmConvGradInput-CPU", + "GemmConvGradInput-GPU", + kBackwardInputTest, + false); } TEST(BackwardFilter, GEMM) { ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test( - "GemmConvGradFilter-CPU", "GemmConvGradFilter-GPU", kBackwardFilterTest); + "GemmConvGradFilter-CPU", + "GemmConvGradFilter-GPU", + kBackwardFilterTest, + false); ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2( - "GemmConvGradFilter-CPU", "GemmConvGradFilter-GPU", kBackwardFilterTest); + "GemmConvGradFilter-CPU", + "GemmConvGradFilter-GPU", + kBackwardFilterTest, + false); } #endif +// ======End Convolution TEST====== + +// ======Start DepthwiseConvolution TEST====== + +// TODO(zhaolong) The depthwise convolution cpu test will be added when the cpu +// version of depthwiseConv is implemented. + +#ifndef PADDLE_ONLY_CPU + +TEST(DepthwiseConvForward, GEMM2) { + ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test( + "GemmConv-CPU", "DepthwiseConv-GPU", kForwardTest); + ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2( + "GemmConv-CPU", "DepthwiseConv-GPU", kForwardTest); +} + +TEST(DepthwiseConvBackwardInput, GEMM) { + ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test( + "GemmConvGradInput-CPU", + "DepthwiseConvGradInput-GPU", + kBackwardInputTest); + ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2( + "GemmConvGradInput-CPU", + "DepthwiseConvGradInput-GPU", + kBackwardInputTest); +} + +TEST(DepthwiseConvBackwardFilter, GEMM) { + ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test( + "GemmConvGradFilter-CPU", + "DepthwiseConvGradFilter-GPU", + kBackwardFilterTest); + ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2( + "GemmConvGradFilter-CPU", + "DepthwiseConvGradFilter-GPU", + kBackwardFilterTest); +} + +#endif +// ======End DepthwiseConvolution TEST====== } // namespace paddle diff --git a/paddle/function/CropOpGpu.cu b/paddle/function/CropOpGpu.cu index 37ce6de0647e5e06a231710b5a53089533de2407..786eb268d45aadee0c1f6fcbbafc23173cf0bc77 100644 --- a/paddle/function/CropOpGpu.cu +++ b/paddle/function/CropOpGpu.cu @@ -36,10 +36,11 @@ __global__ void KeCrop(real* outputs, const real* inputs, template <> void Crop<DEVICE_TYPE_GPU>(real* outputs, const real* inputs, - const TensorShape inShape, - const TensorShape outShape, + const TensorShape inShape, + const TensorShape outShape, const FuncConfig& conf) { - std::vector<uint32_t> crop_corner = conf.get<std::vector<uint32_t>>("crop_corner"); + std::vector<uint32_t> crop_corner = + conf.get<std::vector<uint32_t>>("crop_corner"); int cropC = crop_corner[1]; int cropH = crop_corner[2]; int cropW = crop_corner[3]; @@ -74,7 +75,8 @@ __global__ void KeCropDiff(const real* inGrad, real* outGrad, const int c = (idx / inW / inH) % inC; const int n = idx / inW / inH / inC; - const int off = ((n * outC + c + cropC) * outH + h + cropH) * outW + cropW + w; + const int off = + ((n * outC + c + cropC) * outH + h + cropH) * outW + cropW + w; outGrad[off] += inGrad[idx]; } @@ -86,7 +88,8 @@ void CropGrad<DEVICE_TYPE_GPU>(const real* inGrad, const TensorShape inShape, const TensorShape outShape, const FuncConfig& conf) { - std::vector<uint32_t> crop_corner = conf.get<std::vector<uint32_t>>("crop_corner"); + std::vector<uint32_t> crop_corner = + conf.get<std::vector<uint32_t>>("crop_corner"); int cropC = crop_corner[1]; int cropH = crop_corner[2]; int cropW = crop_corner[3]; diff --git a/paddle/function/DepthwiseConvOp.cpp b/paddle/function/DepthwiseConvOp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..490e8d546cbd460217abe95f6291b13fa207faa9 --- /dev/null +++ b/paddle/function/DepthwiseConvOp.cpp @@ -0,0 +1,306 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "DepthwiseConvOp.h" +#include "ConvOp.h" +#include "GemmFunctor.h" + +namespace paddle { + +template <class T> +class DepthwiseConvFunctor<DEVICE_TYPE_CPU, T> { +public: + void operator()(const T* inputData, + const T* filterData, + int batchSize, + int outputChannels, + int outputHeight, + int outputWidth, + int inputChannels, + int inputHeight, + int inputWidth, + int filterMultiplier, + int filterHeight, + int filterWidth, + int strideH, + int strideW, + int paddingH, + int paddingW, + T* outputData) { + // TODO(zhaolong) : cpu implementation of depthwise convolution + } +}; + +template <class T> +class DepthwiseConvGradInputFunctor<DEVICE_TYPE_CPU, T> { +public: + void operator()(const T* outputGrad, + const T* filterData, + int batchSize, + int outputChannels, + int outputHeight, + int outputWidth, + int inputChannels, + int inputHeight, + int inputWidth, + int filterMultiplier, + int filterHeight, + int filterWidth, + int strideH, + int strideW, + int paddingH, + int paddingW, + T* inputGrad) {} + // TODO(zhaolong) : cpu implementation of depthwise convolution +}; + +template <class T> +class DepthwiseConvGradFilterFunctor<DEVICE_TYPE_CPU, T> { +public: + void operator()(const T* outputGrad, + const T* inputData, + int batchSize, + int outputChannels, + int outputHeight, + int outputWidth, + int inputChannels, + int inputHeight, + int inputWidth, + int filterMultiplier, + int filterHeight, + int filterWidth, + int strideH, + int strideW, + int paddingH, + int paddingW, + T* colData, + T* filterGrad) {} + // TODO(zhaolong) : cpu implementation of depthwise convolution +}; + +/* + * \brief Forward calculation of depthwise convolution. + */ +template <DeviceType Device> +class DepthwiseConvFunction : public ConvFunctionBase { +public: + void init(const FuncConfig& config) override { + ConvFunctionBase::init(config); + } + + void check(const BufferArgs& inputs, const BufferArgs& outputs) override { + const TensorShape& input = inputs[0].shape(); + const TensorShape& filter = inputs[1].shape(); + const TensorShape& output = outputs[0].shape(); + checkShape(input, filter, output); + } + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(numInputs_, inputs.size()); + CHECK_EQ(numOutputs_, outputs.size()); + check(inputs, outputs); + + const TensorShape& input = inputs[0].shape(); + const TensorShape& filter = inputs[1].shape(); + const TensorShape& output = outputs[0].shape(); + + size_t batchSize = input[0]; + size_t inputChannels = input[1]; + size_t inputHeight = input[2]; + size_t inputWidth = input[3]; + size_t filterHeight = getFilterHeight(filter); + size_t filterWidth = getFilterWidth(filter); + size_t outputChannels = output[1]; + size_t outputHeight = output[2]; + size_t outputWidth = output[3]; + size_t filterMultiplier = outputChannels / groups_; + CHECK_EQ(inputChannels, groups_); + + real* inputData = inputs[0].data<real>(); + real* filterData = inputs[1].data<real>(); + real* outputData = outputs[0].data<real>(); + + DepthwiseConvFunctor<Device, real> depthwiseConv; + depthwiseConv(inputData, + filterData, + batchSize, + outputChannels, + outputHeight, + outputWidth, + inputChannels, + inputHeight, + inputWidth, + filterMultiplier, + filterHeight, + filterWidth, + strideH(), + strideW(), + paddingH(), + paddingW(), + outputData); + } +}; + +/* + * \brief Backward input calculation of depthwise convolution. + */ +template <DeviceType Device> +class DepthwiseConvGradInputFunction : public ConvFunctionBase { +public: + void init(const FuncConfig& config) override { + ConvFunctionBase::init(config); + } + + void check(const BufferArgs& inputs, const BufferArgs& outputs) override { + const TensorShape& output = inputs[0].shape(); + const TensorShape& filter = inputs[1].shape(); + const TensorShape& input = outputs[0].shape(); + checkShape(input, filter, output); + } + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(numInputs_, inputs.size()); + CHECK_EQ(numOutputs_, outputs.size()); + CHECK_EQ(outputs[0].getArgType(), ADD_TO); + check(inputs, outputs); + CHECK_EQ(outputs[0].getArgType(), ADD_TO); + const TensorShape& output = inputs[0].shape(); + const TensorShape& filter = inputs[1].shape(); + const TensorShape& input = outputs[0].shape(); + + size_t batchSize = input[0]; + size_t inputChannels = input[1]; + size_t inputHeight = input[2]; + size_t inputWidth = input[3]; + size_t filterHeight = getFilterHeight(filter); + size_t filterWidth = getFilterWidth(filter); + size_t outputChannels = output[1]; + size_t outputHeight = output[2]; + size_t outputWidth = output[3]; + size_t filterMultiplier = outputChannels / groups_; + CHECK_EQ(inputChannels, groups_); + + real* outputGrad = inputs[0].data<real>(); + real* filterData = inputs[1].data<real>(); + real* inputGrad = outputs[0].data<real>(); + + DepthwiseConvGradInputFunctor<Device, real> depthwiseConvGradInput; + depthwiseConvGradInput(outputGrad, + filterData, + batchSize, + outputChannels, + outputHeight, + outputWidth, + inputChannels, + inputHeight, + inputWidth, + filterMultiplier, + filterHeight, + filterWidth, + strideH(), + strideW(), + paddingH(), + paddingW(), + inputGrad); + } +}; + +/* + * \brief Backward filter calculation of depthwise convolution. + */ +template <DeviceType Device> +class DepthwiseConvGradFilterFunction : public ConvFunctionBase { +public: + void init(const FuncConfig& config) override { + ConvFunctionBase::init(config); + } + + void check(const BufferArgs& inputs, const BufferArgs& outputs) override { + const TensorShape& output = inputs[0].shape(); + const TensorShape& input = inputs[1].shape(); + const TensorShape& filter = outputs[0].shape(); + checkShape(input, filter, output); + } + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(numInputs_, inputs.size()); + CHECK_EQ(numOutputs_, outputs.size()); + CHECK_EQ(outputs[0].getArgType(), ADD_TO); + check(inputs, outputs); + const TensorShape& output = inputs[0].shape(); + const TensorShape& input = inputs[1].shape(); + const TensorShape& filter = outputs[0].shape(); + + size_t batchSize = input[0]; + size_t inputChannels = input[1]; + size_t inputHeight = input[2]; + size_t inputWidth = input[3]; + size_t filterHeight = getFilterHeight(filter); + size_t filterWidth = getFilterWidth(filter); + size_t outputChannels = output[1]; + size_t outputHeight = output[2]; + size_t outputWidth = output[3]; + size_t filterMultiplier = outputChannels / groups_; + CHECK_EQ(inputChannels, groups_); + + real* outputGrad = inputs[0].data<real>(); + real* inputData = inputs[1].data<real>(); + real* filterGrad = outputs[0].data<real>(); + + int size = outputChannels * filterHeight * filterWidth * outputHeight * + outputWidth; + resizeBuffer<Device>(size); + real* colData = reinterpret_cast<real*>(memory_->getBuf()); + + DepthwiseConvGradFilterFunctor<Device, real> depthwiseConvGradFilter; + + depthwiseConvGradFilter(outputGrad, + inputData, + batchSize, + outputChannels, + outputHeight, + outputWidth, + inputChannels, + inputHeight, + inputWidth, + filterMultiplier, + filterHeight, + filterWidth, + strideH(), + strideW(), + paddingH(), + paddingW(), + colData, + filterGrad); + } +}; + +REGISTER_TYPED_FUNC(DepthwiseConv, CPU, DepthwiseConvFunction); +REGISTER_TYPED_FUNC(DepthwiseConvGradInput, + CPU, + DepthwiseConvGradInputFunction); +REGISTER_TYPED_FUNC(DepthwiseConvGradFilter, + CPU, + DepthwiseConvGradFilterFunction); +#ifndef PADDLE_ONLY_CPU +REGISTER_TYPED_FUNC(DepthwiseConv, GPU, DepthwiseConvFunction); +REGISTER_TYPED_FUNC(DepthwiseConvGradInput, + GPU, + DepthwiseConvGradInputFunction); +REGISTER_TYPED_FUNC(DepthwiseConvGradFilter, + GPU, + DepthwiseConvGradFilterFunction); +#endif + +} // namespace paddle diff --git a/paddle/function/DepthwiseConvOp.h b/paddle/function/DepthwiseConvOp.h new file mode 100644 index 0000000000000000000000000000000000000000..1bf70e52f34626405b49571e023ac60926713eef --- /dev/null +++ b/paddle/function/DepthwiseConvOp.h @@ -0,0 +1,159 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "TensorType.h" + +namespace paddle { + +/** + *\brief Depthwise convolution forward. The outputData + * of depthwise convolution is same with ExpandConvLayer + * when groups equals inputChannels in ExpandConvLayer. + * + * \param[in] inputData input data. + * \param[in] filterData the Paramters of the depthwise conv layer.. + * \param[in] batchSize batch size of input data. + * \param[in] outputChannels channels of outputData. + * \param[in] outputHeight height of outputData. + * \param[in] outputWidth width of outputData. + * \param[in] inputChannels channels of inputData. + * \param[in] inputHeight height of inputData. + * \param[in] inputWidth width of inputData.. + * \param[in] filterMultiplier equals to outputChannels/groups_. + * \param[in] filterHeight height of filter. + * \param[in] filterWidth widht of filter. + * \param[in] strideH stride size in height direction. + * \param[in] strideW stride size in width direction. + * \param[in] paddingH padding size in height direction. + * \param[in] paddingW padding size in width direction. + * \param[out] outputData outputData. + * + */ +template <DeviceType Device, class T> +class DepthwiseConvFunctor { +public: + void operator()(const T* inputData, + const T* filterData, + int batchSize, + int outputChannels, + int outputHeight, + int outputWidth, + int inputChannels, + int inputHeight, + int inputWidth, + int filterMultiplier, + int filterHeight, + int filterWidth, + int strideH, + int strideW, + int paddingH, + int paddingW, + T* outputData); +}; + +/** + *\brief Functor tot compute the depthwise convolution backprop w.r.t input. + * + * + * \param[in] outputGradData the grad data of output. + * \param[in] filterData the Paramters of the depthwise conv layer.. + * \param[in] batchSize batch size of input data. + * \param[in] outputChannels channels of outputData. + * \param[in] outputHeight height of outputData. + * \param[in] outputWidth width of outputData. + * \param[in] inputChannels channels of input data. + * \param[in] inputHeight height of inputData. + * \param[in] inputWidth width of inputData. + * \param[in] filterMultiplier equals to outputChannels/groups_. + * \param[in] filterHeight height of filter. + * \param[in] filterWidth widht of filter. + * \param[in] strideH stride size in height direction. + * \param[in] strideW stride size in width direction. + * \param[in] paddingH padding size in height direction. + * \param[in] paddingW padding size in width direction. + * \param[out] inputGrad the grad data of input. + * + */ +template <DeviceType Device, class T> +class DepthwiseConvGradInputFunctor { +public: + void operator()(const T* outputGrad, + const T* filterData, + int batchSize, + int outputChannels, + int outputHeight, + int outputWidth, + int inputChannels, + int inputHeight, + int inputWidth, + int filterMultiplier, + int filterHeight, + int filterWidth, + int strideH, + int strideW, + int paddingH, + int paddingW, + T* inputGrad); +}; + +/** + *\brief Functor tot compute the depthwise convolution backprop w.r.t filter. + * + * \param[in] outputGradData the grad data of output. + * \param[in] inputData inputData. + * \param[in] batchSize batch size of input data. + * \param[in] outputChannels channels of outputData. + * \param[in] outputHeight height of outputData. + * \param[in] outputWidth width of outputData. + * \param[in] inputChannels channels of input data. + * \param[in] inputHeight height of inputData. + * \param[in] inputWidth width of inputData. + * \param[in] filterMultiplier equals to outputChannels/groups_. + * \param[in] filterHeight height of filter. + * \param[in] filterWidth widht of filter. + * \param[in] strideH stride size in height direction. + * \param[in] strideW stride size in width direction. + * \param[in] paddingH padding size in height direction. + * \param[in] paddingW padding size in width direction. + * \param[in] colData Auxiliary data when calculating filterGrad. + * \param[in] multiplierData Auxiliary data when calculating filterGrad. + * \param[out] filterGrad the grad data of filter. + * + */ +template <DeviceType Device, class T> +class DepthwiseConvGradFilterFunctor { +public: + void operator()(const T* outputGrad, + const T* inputData, + int batchSize, + int outputChannels, + int outputHeight, + int outputWidth, + int inputChannels, + int inputHeight, + int inputWidth, + int filterMultiplier, + int filterHeight, + int filterWidth, + int strideH, + int strideW, + int paddingH, + int paddingW, + T* colData, + T* filterGrad); +}; + +} // namespace paddle diff --git a/paddle/function/DepthwiseConvOpGpu.cu b/paddle/function/DepthwiseConvOpGpu.cu new file mode 100644 index 0000000000000000000000000000000000000000..ede0d27aa82e7d71ff5bc33df110fec260e06463 --- /dev/null +++ b/paddle/function/DepthwiseConvOpGpu.cu @@ -0,0 +1,342 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "DepthwiseConvOp.h" +#include "GemmFunctor.h" +#include "paddle/math/BaseMatrix.h" + +namespace paddle { + +// CUDA kernel to compute the depthwise convolution forward pass +template <class T> +__global__ +void ConvolutionDepthwiseForward(const int nthreads, + const T* const inputData, const T* const filterData, + const int batchSize, const int outputChannels, const int outputHeight, + const int outputWidth, const int inputChannels, const int inputHeight, + const int inputWidth, const int filterMultiplier, const int filterHeight, + const int filterWidth, const int strideH, const int strideW, + const int paddingH, const int paddingW, T* const outputData) { + + int index = + (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x; + + if (index < nthreads) { + const int batch = index / outputChannels / outputHeight / outputWidth; + const int c_out = (index / outputHeight / outputWidth) % outputChannels; + const int h_out = (index / outputWidth) % outputHeight; + const int w_out = index % outputWidth; + + const int c_in = c_out / filterMultiplier; + const T* weight = filterData + c_out * filterHeight * filterWidth; + T value = 0; + const int h_in_start = -paddingH + h_out * strideH; + const int w_in_start = -paddingW + w_out * strideW; + const int h_in_end = -paddingH + h_out * strideH + filterHeight - 1; + const int w_in_end = -paddingW + w_out * strideW + filterWidth - 1; + if ((h_in_start >= 0) && (h_in_end < inputHeight) + && (w_in_start >= 0) && (w_in_end < inputWidth)) { + for (int kh = 0; kh < filterHeight; ++kh) { + for (int kw = 0; kw < filterWidth; ++kw) { + const int h_in = -paddingH + h_out * strideH + kh; + const int w_in = -paddingW + w_out * strideW + kw; + const int offset = ((batch * inputChannels + c_in) + * inputHeight + h_in) * inputWidth + w_in; + value += (*weight) * inputData[offset]; + ++weight; + } + } + } else { + for (int kh = 0; kh < filterHeight; ++kh) { + for (int kw = 0; kw < filterWidth; ++kw) { + const int h_in = -paddingH + h_out * strideH + kh; + const int w_in = -paddingW + w_out * strideW + kw; + if ((h_in >= 0) && (h_in < inputHeight) + && (w_in >= 0) && (w_in < inputWidth)) { + const int offset = ((batch * inputChannels + c_in) + * inputHeight + h_in) * inputWidth + w_in; + value += (*weight) * inputData[offset]; + } + ++weight; + } + } + } + outputData[index] = value; + } +} + +// CUDA kernel to compute the depthwise convolution backprop w.r.t input. +template <class T> +__global__ +void ConvolutionDepthwiseInputBackward(const int nthreads, + const T* const top_diff, const T* const weight_data, + const int num, const int outputChannels, const int outputHeight, + const int outputWidth, const int inputChannels, const int inputHeight, + const int inputWidth, const int filterMultiplier, const int filterHeight, + const int filterWidth, const int strideH, const int strideW, + const int paddingH, const int paddingW, T* const bottom_diff) { + int index = + (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x; + if (index < nthreads) { + const int batch = index / inputChannels / inputHeight / inputWidth; + const int c_in = (index / inputHeight / inputWidth) % inputChannels; + const int h_in = (index / inputWidth) % inputHeight; + const int w_in = index % inputWidth; + + const int c_out_start = c_in * filterMultiplier; + + int h_out_start = (h_in - filterHeight + paddingH + strideH)/strideH; + h_out_start = 0 > h_out_start ? 0 : h_out_start; + int h_out_end = (h_in + paddingH)/strideH; + h_out_end = outputHeight - 1 < h_out_end? outputHeight - 1 : h_out_end; + int w_out_start = (w_in - filterWidth + paddingW + strideW)/strideW; + w_out_start = 0 > w_out_start ? 0 : w_out_start; + int w_out_end = (w_in + paddingW)/strideW; + w_out_end = outputWidth - 1 < w_out_end? outputWidth - 1 : w_out_end; + + T value = 0; + + for (int c_out = c_out_start; + c_out < c_out_start + filterMultiplier; c_out ++) { + for (int h_out = h_out_start; h_out <= h_out_end; ++h_out) { + const int filter_h = h_in + paddingH - h_out * strideH; + for (int w_out = w_out_start; w_out <= w_out_end; ++w_out) { + const int filter_w = w_in + paddingW - w_out * strideW; + const int filter_offset = c_out * filterHeight * filterWidth + + filter_h * filterWidth + filter_w; + const int top_diff_offset = ((batch * outputChannels + c_out) * + outputHeight + h_out)* outputWidth + w_out; + value += top_diff[top_diff_offset] * weight_data[filter_offset]; + } + } + } + bottom_diff[index] += value; + } +} + +// CUDA kernel to compute the depthwise convolution backprop w.r.t filter. +template <class T> +__global__ +void ConvolutionDepthwiseFilterBackward(const int num_i, const int nthreads, + const T* const top_diff, const T* const inputData, + const int num, const int outputChannels, const int outputHeight, + const int outputWidth, const int inputChannels, const int inputHeight, + const int inputWidth, const int filterMultiplier, const int filterHeight, + const int filterWidth, const int strideH, const int strideW, + const int paddingH, const int paddingW, T* const buffer_data) { + int index = + (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x; + if (index < nthreads) { + const int h_out = (index / outputWidth) % outputHeight; + const int w_out = index % outputWidth; + const int kh = (index / filterWidth / outputHeight / outputWidth) + % filterHeight; + const int kw = (index / outputHeight / outputWidth) % filterWidth; + const int h_in = -paddingH + h_out * strideH + kh; + const int w_in = -paddingW + w_out * strideW + kw; + if ((h_in >= 0) && (h_in < inputHeight) + && (w_in >= 0) && (w_in < inputWidth)) { + const int c_out = index / + (filterHeight * filterWidth * outputHeight * outputWidth); + const int c_in = c_out / filterMultiplier; + const int batch = num_i; + const int top_offset = ((batch * outputChannels + c_out) * + outputHeight + h_out) * outputWidth + w_out; + const int bottom_offset = ((batch * inputChannels + c_in) + * inputHeight + h_in) * inputWidth + w_in; + buffer_data[index] = top_diff[top_offset] * inputData[bottom_offset]; + } else { + buffer_data[index] = 0; + } + } +} + +template <class T> +class DepthwiseConvFunctor<DEVICE_TYPE_GPU, T>{ +public: + void operator()(const T* inputData, + const T* filterData, + int batchSize, + int outputChannels, + int outputHeight, + int outputWidth, + int inputChannels, + int inputHeight, + int inputWidth, + int filterMultiplier, + int filterHeight, + int filterWidth, + int strideH, + int strideW, + int paddingH, + int paddingW, + T* outputData){ + int outputSize = batchSize * outputChannels * outputHeight * outputWidth; + + size_t blocks = (outputSize + 1024 -1) / 1024; + size_t blockX = 512; + size_t blockY = (blocks+512-1)/512; + dim3 threads(1024, 1); + dim3 grid(blockX, blockY); + + ConvolutionDepthwiseForward<T> + <<< grid, threads, 0, STREAM_DEFAULT >>>( + outputSize, + inputData, + filterData, + batchSize, + outputChannels, + outputHeight, + outputWidth, + inputChannels, + inputHeight, + inputWidth, + filterMultiplier, + filterHeight, + filterWidth, + strideH, + strideW, + paddingH, + paddingW, + outputData); + } +}; + +template <class T> +class DepthwiseConvGradInputFunctor<DEVICE_TYPE_GPU, T>{ +public: + void operator()(const T* outputGrad, + const T* filterData, + int batchSize, + int outputChannels, + int outputHeight, + int outputWidth, + int inputChannels, + int inputHeight, + int inputWidth, + int filterMultiplier, + int filterHeight, + int filterWidth, + int strideH, + int strideW, + int paddingH, + int paddingW, + T* inputGrad){ + int inputSize = batchSize * inputChannels * inputHeight * inputWidth; + + size_t blocks = (inputSize + 1024 -1) / 1024; + size_t blockX = 512; + size_t blockY = (blocks+512-1)/512; + dim3 threads(1024, 1); + dim3 grid(blockX, blockY); + + + ConvolutionDepthwiseInputBackward<T> + // NOLINT_NEXT_LINE(whitespace/operators) + <<< grid, threads, 0, STREAM_DEFAULT >>>( + inputSize, + outputGrad, + filterData, + batchSize, + outputChannels, + outputHeight, + outputWidth, + inputChannels, + inputHeight, + inputWidth, + filterMultiplier, + filterHeight, + filterWidth, + strideH, + strideW, + paddingH, + paddingW, + inputGrad); + } +}; + +template <class T> +class DepthwiseConvGradFilterFunctor<DEVICE_TYPE_GPU, T> { +public: + void operator()(const T* outputGrad, + const T* inputData, + int batchSize, + int outputChannels, + int outputHeight, + int outputWidth, + int inputChannels, + int inputHeight, + int inputWidth, + int filterMultiplier, + int filterHeight, + int filterWidth, + int strideH, + int strideW, + int paddingH, + int paddingW, + T* colData, + T* filterGrad){ + int colDataSize = outputChannels * filterHeight * filterWidth + * outputHeight * outputWidth; + + size_t blocks = (colDataSize + 1024 -1) / 1024; + size_t blockX = 512; + size_t blockY = (blocks+512-1)/512; + dim3 threads(1024, 1); + dim3 grid(blockX, blockY); + BaseMatrix filterGradMatrix(outputChannels * filterHeight * filterWidth, + 1, filterGrad, false, true); + + for (int i = 0; i < batchSize; i++) { + ConvolutionDepthwiseFilterBackward<T> + <<< grid, threads, 0, STREAM_DEFAULT >>>( + i, + colDataSize, + outputGrad, + inputData, + batchSize, + outputChannels, + outputHeight, + outputWidth, + inputChannels, + inputHeight, + inputWidth, + filterMultiplier, + filterHeight, + filterWidth, + strideH, + strideW, + paddingH, + paddingW, + colData); + int K = outputHeight * outputWidth; + int M = colDataSize / K; + + BaseMatrix colMatrix(M, K, colData, false, true); + filterGradMatrix.sumRows(colMatrix, (T)1.0, (T)1.0); + } + } +}; + +#ifdef PADDLE_TYPE_DOUBLE +template class DepthwiseConvGradInputFunctor<DEVICE_TYPE_GPU, double>; +template class DepthwiseConvFunctor<DEVICE_TYPE_GPU, double>; +template class DepthwiseConvGradFilterFunctor<DEVICE_TYPE_GPU, double>; +#else +template class DepthwiseConvGradInputFunctor<DEVICE_TYPE_GPU, float>; +template class DepthwiseConvFunctor<DEVICE_TYPE_GPU, float>; +template class DepthwiseConvGradFilterFunctor<DEVICE_TYPE_GPU, float>; +#endif + +} // namespace paddle diff --git a/paddle/gserver/layers/ExpandConvLayer.cpp b/paddle/gserver/layers/ExpandConvLayer.cpp index af79e65a7c09e5a1b55febf1df1e8f5bb61bdcb8..783e02e47cb91e28eb88b079f1e94439d34fa775 100644 --- a/paddle/gserver/layers/ExpandConvLayer.cpp +++ b/paddle/gserver/layers/ExpandConvLayer.cpp @@ -38,10 +38,25 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, inputShape_.resize(numInputs); filterShape_.resize(numInputs); outputShape_.resize(numInputs); + + std::string convType; + std::string convGradInputType; + std::string convGradFilterType; + for (int i = 0; i < config_.inputs_size(); i++) { std::vector<size_t> paddings = {(size_t)paddingY_[i], (size_t)padding_[i]}; std::vector<size_t> strides = {(size_t)strideY_[i], (size_t)stride_[i]}; + if (useGpu_ && (size_t)groups_[i] == (size_t)channels_[i] && !isDeconv_) { + convType = "DepthwiseConv"; + convGradInputType = "DepthwiseConvGradInput"; + convGradFilterType = "DepthwiseConvGradFilter"; + } else { + convType = "GemmConv"; + convGradInputType = "GemmConvGradInput"; + convGradFilterType = "GemmConvGradFilter"; + } + if (FLAGS_use_nnpack) { CHECK_EQ(isDeconv_, false); createFunction(forward_, @@ -53,21 +68,21 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, .set("algo", std::string("auto"))); } else { createFunction(forward_, - !isDeconv_ ? "GemmConv" : "GemmConvGradInput", + !isDeconv_ ? convType : convGradInputType, FuncConfig() .set("paddings", paddings) .set("strides", strides) .set("groups", (size_t)groups_[i])); createFunction(backward_, - !isDeconv_ ? "GemmConvGradInput" : "GemmConv", + !isDeconv_ ? convGradInputType : convType, FuncConfig() .set("paddings", paddings) .set("strides", strides) .set("groups", (size_t)groups_[i])); createFunction(backward_, - "GemmConvGradFilter", + convGradFilterType, FuncConfig() .set("paddings", paddings) .set("strides", strides) diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 9af083468c0f01218117211f9e4931ca0669e96a..0975c3bc9573c6ccb8f0ac98c41586d322d2465e 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -347,6 +347,55 @@ TEST(Layer, CosSimVecMatLayer) { } } +void testDepthwiseConvLayer(const string& type, bool useGpu) { + TestConfig config; + config.biasSize = 32; + config.layerConfig.set_type(type); + config.layerConfig.set_num_filters(32); + config.layerConfig.set_partial_sum(1); + config.layerConfig.set_shared_biases(true); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 2048, 192}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + ConvConfig* conv = input->mutable_conv_conf(); + conv->set_filter_size(2); + conv->set_filter_size_y(3); + conv->set_channels(16); + conv->set_padding(0); + conv->set_padding_y(1); + conv->set_stride(2); + conv->set_stride_y(2); + conv->set_groups(16); + conv->set_filter_channels(conv->channels() / conv->groups()); + conv->set_img_size(16); + conv->set_img_size_y(8); + conv->set_output_x(outputSize(conv->img_size(), + conv->filter_size(), + conv->padding(), + conv->stride(), + /* caffeMode */ true)); + conv->set_output_y(outputSize(conv->img_size_y(), + conv->filter_size_y(), + conv->padding_y(), + conv->stride_y(), + /* caffeMode */ true)); + config.layerConfig.set_size(conv->output_x() * conv->output_y() * + config.layerConfig.num_filters()); + + testLayerGrad(config, "depthwise_conv", 100, false, useGpu); + // Use small batch_size and useWeight=true to test biasGrad + testLayerGrad(config, "depthwise_conv", 2, false, useGpu, true, 0.02); +} + +TEST(Layer, depthwiseConvLayer) { + // 'depthwise_conv' is a sepecial case of 'exconv' whose + // groups size equals to the input channels size. + testDepthwiseConvLayer("exconv", /* useGpu= */ false); +#ifndef PADDLE_ONLY_CPU + testDepthwiseConvLayer("exconv", /* useGpu= */ true); +#endif +} + void testConvLayer(const string& type, bool trans, bool useGpu) { TestConfig config; config.biasSize = 16; diff --git a/paddle/math/MathFunctions.cpp b/paddle/math/MathFunctions.cpp index 7045562dd44f8f3e0be9181b32954c04f0865fa4..c8ba1074a1555bbddde7e5f0fb2a046138b27c09 100644 --- a/paddle/math/MathFunctions.cpp +++ b/paddle/math/MathFunctions.cpp @@ -202,7 +202,7 @@ double dotProduct<double>(const int n, const double* x, const double* y) { return cblas_ddot(n, x, 1, y, 1); } -#ifdef PADDLE_USE_MKL +#if defined(PADDLE_USE_MKL) || defined(PADDLE_USE_MKLML) template <> void vExp<float>(const int n, const float* a, float* r) { @@ -243,7 +243,55 @@ template <> void vAdd<double>(const int n, const double* a, const double* b, double* r) { vdAdd(n, a, b, r); } +#else + +DEFINE_MATRIX_BINARY_OP(vExp, b = std::exp(a)); +template <class T> +void vExp(const int n, const T* a, T* r) { + hl_cpu_apply_binary_op<T, binary::vExp<T>, 0, 0>( + binary::vExp<T>(), const_cast<T*>(a), r, 1, n, n, n); +} + +DEFINE_MATRIX_BINARY_OP(vLog, b = std::log(a)); +template <class T> +void vLog(const int n, const T* a, T* r) { + hl_cpu_apply_binary_op<T, binary::vLog<T>, 0, 0>( + binary::vLog<T>(), const_cast<T*>(a), r, 1, n, n, n); +} + +DEFINE_MATRIX_BINARY_PARAMETER_OP(vPow, ONE_PARAMETER, b = std::pow(a, p)); +template <class T> +void vPow(const int n, const T* a, const T b, T* r) { + hl_cpu_apply_binary_op<T, binary::vPow<T>, 0, 0>( + binary::vPow<T>(b), const_cast<T*>(a), r, 1, n, n, n); +} + +DEFINE_MATRIX_TERNARY_OP(vAdd, c = a + b); +template <class T> +void vAdd(const int n, const T* a, const T* b, T* r) { + hl_cpu_apply_ternary_op<T, ternary::vAdd<T>, 0, 0>(ternary::vAdd<T>(), + const_cast<T*>(a), + const_cast<T*>(b), + r, + 1, + n, + n, + n, + n); +} + +template void vExp(const int n, const float* a, float* r); +template void vExp(const int n, const double* a, double* r); +template void vLog(const int n, const float* a, float* r); +template void vLog(const int n, const double* a, double* r); +template void vPow(const int n, const float* a, const float b, float* r); +template void vPow(const int n, const double* a, const double b, double* r); +template void vAdd(const int n, const float* a, const float* b, float* r); +template void vAdd(const int n, const double* a, const double* b, double* r); +#endif + +#ifdef PADDLE_USE_MKL template <> void vInvSqrt<float>(const int n, const float* a, float* r) { vsInvSqrt(n, a, r); @@ -275,20 +323,6 @@ void vTanh<double>(const int n, const double* a, double* r) { } #else -DEFINE_MATRIX_BINARY_OP(vExp, b = std::exp(a)); -template <class T> -void vExp(const int n, const T* a, T* r) { - hl_cpu_apply_binary_op<T, binary::vExp<T>, 0, 0>( - binary::vExp<T>(), const_cast<T*>(a), r, 1, n, n, n); -} - -DEFINE_MATRIX_BINARY_OP(vLog, b = std::log(a)); -template <class T> -void vLog(const int n, const T* a, T* r) { - hl_cpu_apply_binary_op<T, binary::vLog<T>, 0, 0>( - binary::vLog<T>(), const_cast<T*>(a), r, 1, n, n, n); -} - DEFINE_MATRIX_BINARY_OP(vInvSqrt, b = 1.0f / std::sqrt(a)); template <class T> void vInvSqrt(const int n, const T* a, T* r) { @@ -312,41 +346,12 @@ void vTanh(const int n, const T* a, T* r) { binary::vTanh<T>(), const_cast<T*>(a), r, 1, n, n, n); } -DEFINE_MATRIX_BINARY_PARAMETER_OP(vPow, ONE_PARAMETER, b = std::pow(a, p)); -template <class T> -void vPow(const int n, const T* a, const T b, T* r) { - hl_cpu_apply_binary_op<T, binary::vPow<T>, 0, 0>( - binary::vPow<T>(b), const_cast<T*>(a), r, 1, n, n, n); -} - -DEFINE_MATRIX_TERNARY_OP(vAdd, c = a + b); -template <class T> -void vAdd(const int n, const T* a, const T* b, T* r) { - hl_cpu_apply_ternary_op<T, ternary::vAdd<T>, 0, 0>(ternary::vAdd<T>(), - const_cast<T*>(a), - const_cast<T*>(b), - r, - 1, - n, - n, - n, - n); -} - -template void vExp(const int n, const float* a, float* r); -template void vExp(const int n, const double* a, double* r); -template void vLog(const int n, const float* a, float* r); -template void vLog(const int n, const double* a, double* r); template void vInvSqrt(const int n, const double* a, double* r); template void vInvSqrt(const int n, const float* a, float* r); template void vLog1p(const int n, const float* a, float* r); template void vLog1p(const int n, const double* a, double* r); template void vTanh(const int n, const float* a, float* r); template void vTanh(const int n, const double* a, double* r); -template void vPow(const int n, const float* a, const float b, float* r); -template void vPow(const int n, const double* a, const double b, double* r); -template void vAdd(const int n, const float* a, const float* b, float* r); -template void vAdd(const int n, const double* a, const double* b, double* r); #endif diff --git a/paddle/math/MathFunctions.h b/paddle/math/MathFunctions.h index 8ada0d34c6733d13a45505492909124010c85a91..637643838ff433753e0cbb9154ee069c2f7c6d15 100644 --- a/paddle/math/MathFunctions.h +++ b/paddle/math/MathFunctions.h @@ -15,6 +15,12 @@ limitations under the License. */ #ifndef MATHFUNCTIONS_H_ #define MATHFUNCTIONS_H_ +#ifdef PADDLE_USE_MKLML +#include <mkl_cblas.h> +#include <mkl_lapacke.h> +#include <mkl_vml_functions.h> +#endif + #ifdef PADDLE_USE_MKL #include <mkl.h> #include <mkl_lapacke.h> diff --git a/paddle/memory/README.md b/paddle/memory/README.md index 96a331a486f57d3e030408fee182199bad5b38c2..7f95e80f980b0c0b93ecb418e6b923045313eaa5 100644 --- a/paddle/memory/README.md +++ b/paddle/memory/README.md @@ -1,140 +1,4 @@ -## Design +# Region-based Heterogeneous Memory Management -### Usage - -To allocate 4KB CPU memory: - -```cpp -p = memory::Alloc(platform::CPUPlace(), 4*1024); -``` - -To allocate 4KB memory on the 3rd GPU: - -```cpp -p = memory::Alloc(platform::GPUPlace(2), 4*1024); -``` - -To free memory and check the so-far used amount of memory on a place: - -```cpp -auto pl = platform::GPUPlace(0); -p = memory::Alloc(pl, 4*1024); -cout << memory::Used(pl); -memory::Free(pl, p); -``` - -### API - -In `paddle/memory/memory.h` we have: - -```cpp -namespace memory { -template <typename Place> void* Alloc(Place, size_t); -template <typename Place> void Free(Place, void*); -template <typename Place> size_t Used(Place); -} // namespace memory -``` - -These function templates have specializations on either `platform::CPUPlace` or `platform::GPUPlace`: - -```cpp -template<> -void* Alloc<CPUPlace>(CPUPlace p, size_t size) { - return GetCPUBuddyAllocator()->Alloc(size); -} -``` - -and - -```cpp -template<> -void Alloc<GPUPlace>(GPUPlace p, size_t size) { - return GetGPUBuddyAllocator(p.id)->Alloc(size); -} -``` - -Similar specializations exist for `Free` and `Used`. - -### Implementation - -`GetCPUBuddyAllocator` and `GetGPUBuddyAllocator` are singletions. - -```cpp -BuddyAllocator* GetCPUBuddyAllocator() { - static BuddyAllocator* a = NULL; - if (a == NULL) { - a = new BuddyAllocator(new CPUAllocator /*backup allocator*/, ...); - } - return a; -} - -BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { - static BuddyAllocator* as = NULL; - if (as == NULL) { - as = new BuddyAllocator*[platform::NumGPUs()]; - for (int gpu = 0; gpu < platform::NumGPUs(); gpu++) { - as[gpu] = new BuddyAllocator(new GPUAllocator(gpu) /* backup allocator */, ...); - } - } - return as[gpu_id); -``` - -#### `BuddyAllocator` - -`BuddyAllocator` implements the buddy allocation algorithm. Its constructor takes parameters only related with the algorithm: - -```cpp -BuddyAllocator::BuddyAllocator(initial_pool_size, max_pool_size) { - ... -} -``` - -Please be aware that **`BuddyAllocator` always allocate aligned memory**, aligned on 32-bytes, which can hold a `BuddyAllocator::Block` object: - -```cpp -class BuddyAllocator { - private: - struct Block { - size_t size; - Block* left, right; - size_t index; // allocator id - }; - ... -}; -``` - -Because BuddyAllocator has the meta-data of each block, it can trace the used memory -- record the amount returned by `Alloc` freed in `Free`. Instead, `CPUAllocator` and `GPUAllocator` doesn't know the size of freed memory block and cannot do the trace. - -#### System Allocators - -The `GPUAllocator` and `CPUAllocator` are calls *system allocators*. They work as the fallback allocators of `BuddyAllocator`. - -## Justification - -I got inspiration from Majel and Caffe2, though above design look different from both. - -### Caffe2 - -In Caffe2, `Tensor<Context>::mutable_data()` allocates the memroy. In particular, [`Tensor<Context>::mutable_data`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/tensor.h#L523) calls [`Tensor<Context>::raw_mutable_data`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/tensor.h#L459), which in turn calls [`Context::New`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/tensor.h#L479). - -There are two implementations of `Context`: - -1. [`CPUContext`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.h#L105), whose [`New` method](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.h#L131) calls [`g_cpu_allocator.get()->New(size_t)`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.cc#L15) to allocate the memory. - -1. [`CUDAContext`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.h#L99), which has a data member [`int gpu_id_`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.h#L202). This looks very similar to class `majel::GPUPlace`, who also has an `int id_` data member. `CUDAContext::New(size_t)` calls [`g_cub_allocator->DeviceAllocate(&ptr, nbytes)`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.cu#L355) to allocate the memory. - -### Majel - -In Majel, there are basically two allocator types: - -1. `cpu::SystemAllocator`, which has similar functionality to `caffe2::CPUContext::New/Delete`. -1. `gpu::SystemAllocator`, which has similar functionality to `caffe2::CUDAContext::New/Delete`. - -However, memory allocation is not via these two allocators. Instead, these two allocators are defined in hidden namespaces. - -In Majel there are hidden global variables like: - -1. `cpu::SystemAllocator g_cpu_allocator`, and -1. `vector<gpu::SystemAllocator*> g_gpu_allocators(NUM_GPUS)`. - -Programs allocate memory via a BuddyAllocator, which can take the `g_cpu_allocator` or a `g_gpu_allocators[gpu_id]` as its *fallback allocator*, so that if BuddyAllocator cannot find a block in its memory pool, it extends its memory pool by calling the fallback allocator's `New(size_t)`. +Please check out the [design documentation](http://gangliao.me) to find out more details about +buddy memory allocator for both CPU and GPU. diff --git a/paddle/memory/memory.h b/paddle/memory/memory.h index 3ac359e1746df2bcd6d285793b0d4a677f282cb7..7ef7a73bc8b25e6a637a5e89c87e3eef06174b92 100644 --- a/paddle/memory/memory.h +++ b/paddle/memory/memory.h @@ -38,5 +38,17 @@ void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num, cudaStream_t stream); #endif // PADDLE_ONLY_CPU +template <typename T, /* must be POD types */ + typename Place /* platform::GPUPlace or platform::CPUPlace */, + typename std::enable_if<std::is_pod<T>::value>::type* = nullptr> +class PODDeleter { + public: + PODDeleter(Place place) : place_(place) {} + void operator()(T* ptr) { Free(place_, static_cast<void*>(ptr)); } + + private: + Place place_; +}; + } // namespace memory } // namespace paddle diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index a37720e5093342f5e02bd9a15a3099de434d6396..0a14dc21144153f9a45d5227e54102983c6c2659 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -48,6 +48,7 @@ op_library(mul_op SRCS mul_op.cc mul_op.cu) op_library(rowwise_add_op SRCS rowwise_add_op.cu rowwise_add_op.cc) op_library(sigmoid_op SRCS sigmoid_op.cu sigmoid_op.cc) op_library(softmax_op SRCS softmax_op.cc softmax_op.cu) +op_library(cross_entropy_op SRCS cross_entropy_op.cc cross_entropy_op.cu) op_library(fc_op SRCS fc_op.cc DEPS mul_op rowwise_add_op sigmoid_op softmax_op net) diff --git a/paddle/operators/add_op.cc b/paddle/operators/add_op.cc index 41d044cdb72b5fb2a7f8654e8ad103778e0857d1..ebe9ceebe488437866fd6097531623eeb547f67a 100644 --- a/paddle/operators/add_op.cc +++ b/paddle/operators/add_op.cc @@ -31,7 +31,7 @@ protected: "Inputs/Outputs of AddOp must all be set"); PADDLE_ENFORCE(inputs[0]->dims() == inputs[1]->dims(), "Two input of Add Op's dimension must be same."); - outputs[0]->set_dims(inputs[0]->dims()); + outputs[0]->Resize(inputs[0]->dims()); } }; @@ -53,6 +53,5 @@ The equation is: Out = X + Y } // namespace paddle REGISTER_OP(add_two, paddle::operators::AddOp, paddle::operators::AddOpMaker); -typedef paddle::operators::AddKernel<::paddle::platform::CPUPlace, float> - AddKernel_CPU_float; -REGISTER_OP_CPU_KERNEL(add_two, AddKernel_CPU_float); +REGISTER_OP_CPU_KERNEL( + add_two, paddle::operators::AddKernel<paddle::platform::CPUPlace, float>); diff --git a/paddle/operators/add_op.cu b/paddle/operators/add_op.cu index 0edf142ee4e5f359ea14be02dbf3f7f8855f6db1..2e5a755f92e4d1fa487152ed453fe3b2823062ed 100644 --- a/paddle/operators/add_op.cu +++ b/paddle/operators/add_op.cu @@ -1,6 +1,5 @@ #include "paddle/operators/add_op.h" #include "paddle/framework/op_registry.h" -typedef paddle::operators::AddKernel<::paddle::platform::GPUPlace, float> AddKernel_GPU_float; REGISTER_OP_GPU_KERNEL(add_two, - AddKernel_GPU_float); \ No newline at end of file + paddle::operators::AddKernel<paddle::platform::GPUPlace, float>); \ No newline at end of file diff --git a/paddle/operators/cross_entropy_op.cc b/paddle/operators/cross_entropy_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..7d7bb09f3d63bef49913c3c7501082c509c45653 --- /dev/null +++ b/paddle/operators/cross_entropy_op.cc @@ -0,0 +1,67 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/cross_entropy_op.h" +#include "paddle/framework/op_registry.h" +#include "paddle/framework/tensor.h" + +namespace paddle { +namespace operators { + +class OnehotCrossEntropyOp : public framework::OperatorWithKernel { +protected: + void InferShape( + const std::vector<const framework::Tensor *> &inputs, + const std::vector<framework::Tensor *> &outputs) const override { + PADDLE_ENFORCE(inputs.size() == 2, + "Input size of OnehotCrossEntropyOp must be two"); + PADDLE_ENFORCE(outputs.size() == 1, + "Output size of OnehotCrossEntropyOp must be one"); + PADDLE_ENFORCE(inputs[0] != nullptr && inputs[1] != nullptr, + "Inputs of OnehotCrossEntropyOp must all be set"); + PADDLE_ENFORCE(outputs[0] != nullptr, + "Outputs of OnehotCrossEntropyOp must all be set"); + PADDLE_ENFORCE(inputs[0]->dims().size() == 2, "X's dimension must be 2."); + PADDLE_ENFORCE(outputs[0]->dims().size() == 1, + "label's dimension must be 1."); + outputs[0]->Resize(framework::make_ddim({inputs[0]->dims()[0]})); + } +}; + +class OnehotCrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker { +public: + OnehotCrossEntropyOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : framework::OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "The first input of OnehotCrossEntropyOp"); + AddInput("label", "The second input of OnehotCrossEntropyOp"); + AddOutput("Y", "The output of OnehotCrossEntropyOp"); + AddComment(R"DOC( +OnehotCrossEntropy Operator. + + Y[i] = -log(X[i][j]) + +)DOC"); + } +}; +} // namespace operators +} // namespace paddle + +REGISTER_OP(onehot_cross_entropy, + paddle::operators::OnehotCrossEntropyOp, + paddle::operators::OnehotCrossEntropyOpMaker); +REGISTER_OP_CPU_KERNEL( + onehot_cross_entropy, + paddle::operators::OnehotCrossEntropyOpKernel<::paddle::platform::CPUPlace, + float>); diff --git a/paddle/operators/cross_entropy_op.cu b/paddle/operators/cross_entropy_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..1bcdcb7ea650a361cad376ecdd5e96fe8e8f7c94 --- /dev/null +++ b/paddle/operators/cross_entropy_op.cu @@ -0,0 +1,6 @@ +#include "paddle/operators/cross_entropy_op.h" +#include "paddle/framework/op_registry.h" + +REGISTER_OP_GPU_KERNEL(onehot_cross_entropy, + paddle::operators::OnehotCrossEntropyOpKernel< + ::paddle::platform::GPUPlace, float>); \ No newline at end of file diff --git a/paddle/operators/cross_entropy_op.h b/paddle/operators/cross_entropy_op.h new file mode 100644 index 0000000000000000000000000000000000000000..ad2c7f34e1fd91b97287b4c5f4004d5b79ea4f82 --- /dev/null +++ b/paddle/operators/cross_entropy_op.h @@ -0,0 +1,50 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "glog/logging.h" +#include "paddle/framework/operator.h" + +namespace paddle { +namespace operators { + +template <typename Place, typename T> +class OnehotCrossEntropyOpKernel : public framework::OpKernel { +public: + constexpr T LOG_THRESHOLD() const { return static_cast<T>(1e-20); } + + void Compute(const framework::KernelContext& context) const override { + auto X = context.Input(0)->Get<framework::Tensor>(); + const T* X_data = X.data<T>(); + const int* label_data = + context.Input(1)->Get<framework::Tensor>().data<int>(); + auto* Y = context.Output(0)->GetMutable<framework::Tensor>(); + + Y->mutable_data<T>(context.GetPlace()); + + T* Y_data = Y->data<T>(); + + int batch_size = X.dims()[0]; + int class_num = X.dims()[1]; + + // Y[i] = -log(X[i][j]) + for (int i = 0; i < batch_size; ++i) { + Y_data[i] = -std::log( + std::max(X_data[i * class_num + label_data[i]], LOG_THRESHOLD())); + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/mul_op.cc b/paddle/operators/mul_op.cc index 713b2a5dc83d8dd5a3d944101591d75cb19fe04f..079a5800804345762b0b4bc7b8bc9ca042856ccc 100644 --- a/paddle/operators/mul_op.cc +++ b/paddle/operators/mul_op.cc @@ -12,9 +12,9 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include <paddle/framework/op_registry.h> -#include <paddle/framework/tensor.h> -#include <paddle/operators/mul_op.h> +#include "paddle/operators/mul_op.h" +#include "paddle/framework/op_registry.h" +#include "paddle/framework/tensor.h" namespace paddle { namespace operators { @@ -33,7 +33,7 @@ protected: dim0[1] == dim1[0], "First matrix's width must be equal with second matrix's height."); PADDLE_ENFORCE(outputs.size() == 1, "The mul op must take one output"); - outputs[0]->set_dims({dim0[0], dim1[1]}); + outputs[0]->Resize({dim0[0], dim1[1]}); } }; @@ -57,4 +57,4 @@ The equation is: Out = X * Y REGISTER_OP(mul, paddle::operators::MulOp, paddle::operators::MulOpMaker); REGISTER_OP_CPU_KERNEL( - mul, paddle::operators::MulKernel<paddle::platform::CPUPlace>); + mul, paddle::operators::MulKernel<paddle::platform::CPUPlace, float>); diff --git a/paddle/operators/mul_op.cu b/paddle/operators/mul_op.cu index 201723df247993c5cc1650edbe4f74441e3217d4..3ee581dc77dc08e6e47b240588811fbc7c6ea303 100644 --- a/paddle/operators/mul_op.cu +++ b/paddle/operators/mul_op.cu @@ -12,9 +12,9 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include <paddle/operators/mul_op.h> -#include <paddle/framework/op_registry.h> +#include "paddle/operators/mul_op.h" +#include "paddle/framework/op_registry.h" REGISTER_OP_GPU_KERNEL(mul, paddle::operators::MulKernel<paddle::platform - ::GPUPlace>); \ No newline at end of file + ::GPUPlace, float>); \ No newline at end of file diff --git a/paddle/operators/mul_op.h b/paddle/operators/mul_op.h index ce8a0169e0cbaafb7e90d2227c9597fff463883d..e6bad7fb9da2d489666aa67f032552e48a86c6cb 100644 --- a/paddle/operators/mul_op.h +++ b/paddle/operators/mul_op.h @@ -14,17 +14,30 @@ #pragma once -#include <glog/logging.h> -#include <paddle/framework/operator.h> +#include "glog/logging.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/operator.h" namespace paddle { namespace operators { -template <typename Place> +template <typename Place, typename T> class MulKernel : public framework::OpKernel { public: - void Compute(const framework::KernelContext &context) const override { - LOG(INFO) << "Mul kernel in " << typeid(Place).name(); + void Compute(const framework::KernelContext& context) const override { + Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> dim_pair = { + {Eigen::IndexPair<Eigen::DenseIndex>(1, 0)}}; + + auto input0 = context.Input(0)->Get<framework::Tensor>(); + auto input1 = context.Input(1)->Get<framework::Tensor>(); + auto* output = context.Output(0)->GetMutable<framework::Tensor>(); + + output->mutable_data<T>(context.GetPlace()); + + framework::EigenMatrix<T>::From(*output).device( + *(context.GetEigenDevice<Place>())) = + framework::EigenMatrix<T>::From(input0).contract( + framework::EigenMatrix<T>::From(input1), dim_pair); } }; } // namespace operators diff --git a/paddle/operators/rowwise_add_op.cc b/paddle/operators/rowwise_add_op.cc index 414bafd0468033813d50d4d6723e68ee9347eaac..e04d69fa72a2f54cc1cc0829d12e0da1609b3383 100644 --- a/paddle/operators/rowwise_add_op.cc +++ b/paddle/operators/rowwise_add_op.cc @@ -12,8 +12,8 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include <paddle/framework/op_registry.h> -#include <paddle/operators/rowwise_add_op.h> +#include "paddle/operators/rowwise_add_op.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { @@ -30,7 +30,7 @@ protected: PADDLE_ENFORCE(dim1.size() == 1, "The second input must be vector"); PADDLE_ENFORCE(dim0[1] == dim1[0], "The width of two input must be same"); PADDLE_ENFORCE(outputs.size() == 1, "The output size must be 1"); - outputs[0]->set_dims(inputs[0]->dims()); + outputs[0]->Resize(inputs[0]->dims()); } }; @@ -58,4 +58,4 @@ REGISTER_OP(rowwise_add, paddle::operators::RowWiseAddOpMaker); REGISTER_OP_CPU_KERNEL( rowwise_add, - paddle::operators::RowWiseAddKernel<paddle::platform::CPUPlace>); + paddle::operators::RowWiseAddKernel<paddle::platform::CPUPlace, float>); diff --git a/paddle/operators/rowwise_add_op.cu b/paddle/operators/rowwise_add_op.cu index 2c4bfbf93a1064a47a19c991fa6655b5d67e83cb..5dfac4fd2cf9b7da24dcfa5e7583b9ece12bad1e 100644 --- a/paddle/operators/rowwise_add_op.cu +++ b/paddle/operators/rowwise_add_op.cu @@ -1,6 +1,6 @@ -#include <paddle/framework/op_registry.h> -#include <paddle/operators/rowwise_add_op.h> +#include "paddle/framework/op_registry.h" +#include "paddle/operators/rowwise_add_op.h" REGISTER_OP_GPU_KERNEL( rowwise_add, - paddle::operators::RowWiseAddKernel<paddle::platform ::GPUPlace>); + paddle::operators::RowWiseAddKernel<paddle::platform ::GPUPlace, float>); diff --git a/paddle/operators/rowwise_add_op.h b/paddle/operators/rowwise_add_op.h index 35f43e6376be6239021e7a9bacb849b93d5226b5..dc47fe7c847bd0c8c179ac0a5f44b8cc541b47cb 100644 --- a/paddle/operators/rowwise_add_op.h +++ b/paddle/operators/rowwise_add_op.h @@ -13,17 +13,32 @@ limitations under the License. */ #pragma once -#include <glog/logging.h> -#include <paddle/framework/operator.h> +#include "glog/logging.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/operator.h" namespace paddle { namespace operators { -template <typename Place> +template <typename Place, typename T> class RowWiseAddKernel : public framework::OpKernel { public: - void Compute(const framework::KernelContext &context) const override { - LOG(INFO) << "RowWiseAdd kernel in " << typeid(Place).name(); + void Compute(const framework::KernelContext& context) const override { + auto in0 = context.Input(0)->Get<framework::Tensor>(); + auto in1 = context.Input(1)->Get<framework::Tensor>(); + auto* out = context.Output(0)->GetMutable<framework::Tensor>(); + out->mutable_data<T>(context.GetPlace()); + + auto input = framework::EigenMatrix<T>::From(in0); + auto bias = framework::EigenVector<T>::From(in1); + auto output = framework::EigenMatrix<T>::From(*out); + + const int bias_size = bias.dimension(0); + const int rest_size = input.size() / bias_size; + Eigen::DSizes<int, 1> one_d(input.size()); + Eigen::DSizes<int, 1> bcast(rest_size); + output.reshape(one_d).device(*(context.GetEigenDevice<Place>())) = + input.reshape(one_d) + bias.broadcast(bcast).reshape(one_d); } }; diff --git a/paddle/operators/sgd_op.cc b/paddle/operators/sgd_op.cc index 04df87a3add2af7daa127a072f7b690f6cf94327..66ab1e001142bfb005d3c2e2ea29e01a32dce507 100644 --- a/paddle/operators/sgd_op.cc +++ b/paddle/operators/sgd_op.cc @@ -31,7 +31,7 @@ protected: PADDLE_ENFORCE(outputs[0] != nullptr, "outputs[0] mast be set"); PADDLE_ENFORCE(inputs[0]->dims() == inputs[1]->dims(), "Two input of SGD Op's dimension must be same."); - outputs[0]->set_dims(inputs[0]->dims()); + outputs[0]->Resize(inputs[0]->dims()); } }; diff --git a/paddle/operators/sigmoid_op.cc b/paddle/operators/sigmoid_op.cc index 45ae277c538ca90716febaf2f3d92b560149d147..91f7d86aebae2e67b2fc18bf2c558fbe2e03de92 100644 --- a/paddle/operators/sigmoid_op.cc +++ b/paddle/operators/sigmoid_op.cc @@ -12,8 +12,8 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include <paddle/framework/op_registry.h> -#include <paddle/operators/sigmoid_op.h> +#include "paddle/operators/sigmoid_op.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { @@ -24,7 +24,7 @@ protected: const std::vector<framework::Tensor *> &outputs) const override { PADDLE_ENFORCE(inputs.size() == 1, "Sigmoid Op only have one input"); PADDLE_ENFORCE(outputs.size() == 1, "Sigmoid Op only have one output"); - outputs[0]->set_dims(inputs[0]->dims()); + outputs[0]->Resize(inputs[0]->dims()); } }; @@ -34,7 +34,7 @@ public: framework::OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "sigmoid input"); - AddInput("Y", "sigmoid output"); + AddOutput("Y", "sigmoid output"); AddComment("Sigmoid function"); } }; @@ -46,4 +46,5 @@ REGISTER_OP(sigmoid, paddle::operators::SigmoidOp, paddle::operators::SigmoidOpMaker); REGISTER_OP_CPU_KERNEL( - sigmoid, paddle::operators::SigmoidKernel<paddle::platform::CPUPlace>); + sigmoid, + paddle::operators::SigmoidKernel<paddle::platform::CPUPlace, float>); diff --git a/paddle/operators/sigmoid_op.cu b/paddle/operators/sigmoid_op.cu index 79d5222348f610b1b016a2df06e8b1e0a4fac66c..ed344b2bfd4a9eeef2ce79746bec608469503c9c 100644 --- a/paddle/operators/sigmoid_op.cu +++ b/paddle/operators/sigmoid_op.cu @@ -1,5 +1,5 @@ -#include <paddle/operators/sigmoid_op.h> -#include <paddle/framework/op_registry.h> +#include "paddle/operators/sigmoid_op.h" +#include "paddle/framework/op_registry.h" REGISTER_OP_GPU_KERNEL( - sigmoid, paddle::operators::SigmoidKernel<paddle::platform::GPUPlace>); + sigmoid, paddle::operators::SigmoidKernel<paddle::platform::GPUPlace, float>); diff --git a/paddle/operators/sigmoid_op.h b/paddle/operators/sigmoid_op.h index 42173343f3e364729ecd190fc554b8c45ecfca8d..2b9356246c471853b53af1d73f8b2a3c206db7ad 100644 --- a/paddle/operators/sigmoid_op.h +++ b/paddle/operators/sigmoid_op.h @@ -14,17 +14,25 @@ #pragma once -#include <glog/logging.h> -#include <paddle/framework/operator.h> +#include "glog/logging.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/operator.h" namespace paddle { namespace operators { -template <typename Place> +template <typename Place, typename T> class SigmoidKernel : public framework::OpKernel { public: - void Compute(const framework::KernelContext &context) const override { - LOG(INFO) << "Sigmoid kernel in " << typeid(Place).name(); + void Compute(const framework::KernelContext& context) const override { + auto input = context.Input(0)->Get<framework::Tensor>(); + auto* output = context.Output(0)->GetMutable<framework::Tensor>(); + + output->mutable_data<T>(context.GetPlace()); + + framework::EigenVector<T>::Flatten(*output).device( + *(context.GetEigenDevice<Place>())) = + 1.0 / (1.0 + (-1.0 * framework::EigenVector<T>::Flatten(input)).exp()); } }; } // namespace operators diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc index 4ca7be359e210d7a31aef94e498f37a1ad4879a2..cf5e273de6be71e727f27d5e87d13d9235e31d0c 100644 --- a/paddle/operators/softmax_op.cc +++ b/paddle/operators/softmax_op.cc @@ -11,8 +11,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include <paddle/framework/op_registry.h> -#include <paddle/operators/softmax_op.h> +#include "paddle/operators/softmax_op.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { @@ -23,9 +23,11 @@ protected: const std::vector<const framework::Tensor *> &inputs, const std::vector<framework::Tensor *> &outputs) const override { PADDLE_ENFORCE(inputs.size() == 1, "Only one input is need for softmax"); + PADDLE_ENFORCE(inputs[0]->dims().size() == 2, + "The input of softmax op must be matrix"); PADDLE_ENFORCE(outputs.size() == 1, "Only one output is need for softmax"); - outputs[0]->set_dims(inputs[0]->dims()); + outputs[0]->Resize(inputs[0]->dims()); } }; @@ -46,4 +48,5 @@ public: namespace ops = paddle::operators; REGISTER_OP(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker); -REGISTER_OP_CPU_KERNEL(softmax, ops::SoftmaxKernel<paddle::platform::CPUPlace>); +REGISTER_OP_CPU_KERNEL(softmax, + ops::SoftmaxKernel<paddle::platform::CPUPlace, float>); diff --git a/paddle/operators/softmax_op.cu b/paddle/operators/softmax_op.cu index 903eef1b62231d65e2f9ec7a1f57fca0f4c4605c..60676191eb9460868a266d0e4f70357fa78bec2c 100644 --- a/paddle/operators/softmax_op.cu +++ b/paddle/operators/softmax_op.cu @@ -1,5 +1,5 @@ -#include <paddle/framework/op_registry.h> -#include <paddle/operators/softmax_op.h> +#include "paddle/framework/op_registry.h" +#include "paddle/operators/softmax_op.h" REGISTER_OP_GPU_KERNEL( - softmax, paddle::operators::SoftmaxKernel<paddle::platform::GPUPlace>); + softmax, paddle::operators::SoftmaxKernel<paddle::platform::GPUPlace, float>); diff --git a/paddle/operators/softmax_op.h b/paddle/operators/softmax_op.h index 74e9e2786b11b9a87cd9700d8458d4e611a8d4bb..500c188dbfcf28ae52c2d5b06466539e115acc4a 100644 --- a/paddle/operators/softmax_op.h +++ b/paddle/operators/softmax_op.h @@ -14,17 +14,49 @@ #pragma once -#include <glog/logging.h> -#include <paddle/framework/operator.h> +#include "glog/logging.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/operator.h" namespace paddle { namespace operators { -template <typename Place> +template <typename Place, typename T> class SoftmaxKernel : public framework::OpKernel { public: - void Compute(const framework::KernelContext &context) const override { - LOG(INFO) << "Softmax kernel in " << typeid(Place).name(); + void Compute(const framework::KernelContext& context) const override { + auto input = context.Input(0)->Get<framework::Tensor>(); + auto* output = context.Output(0)->GetMutable<framework::Tensor>(); + output->mutable_data<T>(context.GetPlace()); + + auto logits = framework::EigenMatrix<T>::From(input); + auto softmax = framework::EigenMatrix<T>::From(*output); + + const int kBatchDim = 0; + const int kClassDim = 1; + + const int batch_size = logits.dimension(kBatchDim); + const int num_classes = logits.dimension(kClassDim); + + Eigen::DSizes<int, 1> along_class(kClassDim); + Eigen::DSizes<int, 2> batch_by_one(batch_size, 1); + Eigen::DSizes<int, 2> one_by_class(1, num_classes); + + auto shifted_logits = (logits - + logits.maximum(along_class) + .eval() + .reshape(batch_by_one) + .broadcast(one_by_class)); + + softmax.device(*(context.GetEigenDevice<Place>())) = shifted_logits.exp(); + + softmax.device(*(context.GetEigenDevice<Place>())) = + (softmax * + softmax.sum(along_class) + .inverse() + .eval() + .reshape(batch_by_one) + .broadcast(one_by_class)); } }; } // namespace operators diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt index 6354dd211d5d036e1b5971babaf624e8f847a92b..fd1a142b40e19d257505f0465ce6c7a62e5cbc35 100644 --- a/paddle/pybind/CMakeLists.txt +++ b/paddle/pybind/CMakeLists.txt @@ -1,2 +1,2 @@ cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python - add_op fc_op sgd_op) + add_op fc_op sgd_op cross_entropy_op) diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 54707a2859693af4a80692bf5cebab59c43ffbc3..7a215881706ddc648f2753fe73e789f7fc460072 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -27,8 +27,13 @@ namespace py = pybind11; namespace pd = paddle::framework; USE_OP(add_two); +USE_OP(onehot_cross_entropy); USE_OP_WITHOUT_KERNEL(fc); USE_OP(sgd); +USE_OP(mul); +USE_OP(sigmoid); +USE_OP(softmax); +USE_OP(rowwise_add); PYBIND11_PLUGIN(core) { py::module m("core", "C++ core of Paddle Paddle"); @@ -41,7 +46,7 @@ PYBIND11_PLUGIN(core) { [](const pd::Tensor& self) { return pd::vectorize(self.dims()); }) .def("set_dims", [](pd::Tensor& self, const std::vector<int>& dim) { - self.set_dims(pd::make_ddim(dim)); + self.Resize(pd::make_ddim(dim)); }) .def("alloc_float", [](pd::Tensor& self) { diff --git a/paddle/pybind/tensor_bind.h b/paddle/pybind/tensor_bind.h index b96516643ab55b9615ccafdc41d3290590987d95..995e102bf9d342e1604f5ae704288d6cf68d97a4 100644 --- a/paddle/pybind/tensor_bind.h +++ b/paddle/pybind/tensor_bind.h @@ -86,7 +86,7 @@ void PyTensorSetFromArray( dims.push_back((int)array.shape()[i]); } - self.set_dims(framework::make_ddim(dims)); + self.Resize(framework::make_ddim(dims)); auto *dst = self.mutable_data<T>(paddle::platform::CPUPlace()); std::memcpy(dst, array.data(), sizeof(T) * array.size()); } diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index ab81e67579e39a34e3ace18d14434eb86b66fa5b..fc112f1327f5ad5f1bdd04873394b1fa0e761e29 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -3219,6 +3219,10 @@ def ParameterHook(type, **kwargs): if sparsity_ratio is not None: hook.sparsity_ratio = sparsity_ratio return hook + elif type == 'dpruning': + hook = ParameterUpdaterHookConfig() + hook.type = type + return hook else: return None diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index fdb6f83f2ba510232714fb8a9c7c1af837a753ff..21eba71527e60833e0c69b344ecc639626faa529 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -3173,11 +3173,11 @@ def memory(name, @wrap_bias_attr_default() -@wrap_act_default( - param_names=['gate_act', 'state_act'], act=SigmoidActivation()) +@wrap_act_default(param_names=['gate_act'], act=SigmoidActivation()) +@wrap_act_default(param_names=['state_act'], act=TanhActivation()) @wrap_act_default(act=TanhActivation()) @wrap_name_default('lstm_step') -@layer_support() +@layer_support(ERROR_CLIPPING, DROPOUT) def lstm_step_layer(input, state, size=None, @@ -3531,12 +3531,7 @@ def SubsequenceInput(input): @wrap_name_default("recurrent_group") -def recurrent_group(step, - input, - reverse=False, - name=None, - targetInlink=None, - is_generating=False): +def recurrent_group(step, input, reverse=False, name=None, targetInlink=None): """ Recurrent layer group is an extremely flexible recurrent unit in PaddlePaddle. As long as the user defines the calculation done within a @@ -3602,21 +3597,12 @@ def recurrent_group(step, :type targetInlink: LayerOutput|SubsequenceInput - :param is_generating: If is generating, none of input type should be LayerOutput; - else, for training or testing, one of the input type must - be LayerOutput. - - :type is_generating: bool - :return: LayerOutput object. :rtype: LayerOutput """ model_type('recurrent_nn') - def is_single_input(x): - return isinstance(x, LayerOutput) or isinstance(x, StaticInput) - - if is_single_input(input): + if isinstance(input, LayerOutput) or isinstance(input, StaticInput): input = [input] assert isinstance(input, collections.Sequence) @@ -3630,13 +3616,8 @@ def recurrent_group(step, in_links=map(lambda x: x.name, in_links), seq_reversed=reverse) in_args = [] - has_LayerOutput = False for each_input in input: - assert is_single_input(each_input) - if isinstance(each_input, LayerOutput): - in_args.append(each_input) - has_LayerOutput = True - else: # StaticInput + if isinstance(each_input, StaticInput): # StaticInput mem_name = "__%s_memory__" % each_input.input.name mem = memory( name=None, @@ -3644,24 +3625,26 @@ def recurrent_group(step, boot_layer=each_input.input) mem.set_input(mem) in_args.append(mem) - - assert (is_generating != has_LayerOutput) + else: + in_args.append(each_input) layer_outs = step(*in_args) if isinstance(layer_outs, LayerOutput): layer_outs = [layer_outs] - for ot in layer_outs: - assert isinstance(ot, LayerOutput) - ot.reverse = reverse - RecurrentLayerGroupSetOutLink(ot.name) + for layer_out in layer_outs: + assert isinstance( + layer_out, LayerOutput + ), "Type of step function's return value must be LayerOutput." + layer_out.reverse = reverse + RecurrentLayerGroupSetOutLink(layer_out.name) RecurrentLayerGroupEnd(name=name) for layer_out in layer_outs: - # Thee previous full_name is the name is the rnn group - # We need a full_name outside the rnn group + # The previous full_name is the name inside the recurrent group. + # We need a full_name outside the recurrent group. layer_out.full_name = MakeLayerNameInSubmodel(layer_out.name) if len(layer_outs) == 1: @@ -3684,7 +3667,20 @@ class BaseGeneratedInput(object): class GeneratedInput(BaseGeneratedInput): def after_real_step(self, input): - return maxid_layer(input=input, name='__beam_search_predict__') + if isinstance(input, LayerOutput): + input = [input] + elif isinstance(input, collections.Sequence): + input = list(input) + if len(input) > 1: + logger.info( + ("More than one layers inside the recurrent_group " + "are returned as outputs of the entire recurrent_group " + "PLEASE garantee the first output is probability of " + "the predicted next word.")) + + return [maxid_layer( + input=input[0], name='__beam_search_predict__')] + ( + input[1:] if len(input) > 1 else []) def before_real_step(self): predict_id = memory( @@ -3871,6 +3867,7 @@ def beam_search(step, :type step: callable :param input: Input data for the recurrent unit, which should include the previously generated words as a GeneratedInput object. + In beam_search, none of the input's type should be LayerOutput. :type input: list :param bos_id: Index of the start symbol in the dictionary. The start symbol is a special token for NLP task, which indicates the @@ -3912,15 +3909,18 @@ def beam_search(step, real_input = [] for i, each_input in enumerate(input): - assert isinstance(each_input, StaticInput) or isinstance( - each_input, BaseGeneratedInput) + assert not isinstance(each_input, LayerOutput), ( + "in beam_search, " + "none of the input should has a type of LayerOutput.") if isinstance(each_input, BaseGeneratedInput): - assert generated_input_index == -1 + assert generated_input_index == -1, ("recurrent_group accepts " + "only one GeneratedInput.") generated_input_index = i + else: real_input.append(each_input) - assert generated_input_index != -1 + assert generated_input_index != -1, "No GeneratedInput is given." gipt = input[generated_input_index] @@ -3941,17 +3941,11 @@ def beam_search(step, predict = gipt.after_real_step(step(*args)) - eos_layer(input=predict, eos_id=eos_id, name=eos_name) + eos_layer(input=predict[0], eos_id=eos_id, name=eos_name) return predict - tmp = recurrent_group( - step=__real_step__, - input=real_input, - reverse=False, - name=name, - is_generating=True) - - return tmp + return recurrent_group( + step=__real_step__, input=real_input, reverse=False, name=name) def __cost_input__(input, label, weight=None): diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py index 810bea913ec79b2df0eb63ed5a4fd411549ff2e9..dcc4fec4f3313f2ad10073dcecbc015be4021abd 100755 --- a/python/paddle/trainer_config_helpers/networks.py +++ b/python/paddle/trainer_config_helpers/networks.py @@ -614,18 +614,17 @@ def simple_lstm(input, @wrap_name_default('lstm_unit') def lstmemory_unit(input, - memory_boot=None, + out_memory=None, name=None, size=None, param_attr=None, act=None, gate_act=None, state_act=None, - mixed_bias_attr=None, + input_proj_bias_attr=None, + input_proj_layer_attr=None, lstm_bias_attr=None, - mixed_layer_attr=None, - lstm_layer_attr=None, - get_output_layer_attr=None): + lstm_layer_attr=None): """ Define calculations that a LSTM unit performs during a single time step. This function itself is not a recurrent layer, so it can not be @@ -662,8 +661,8 @@ def lstmemory_unit(input, :param input: input layer name. :type input: LayerOutput - :param memory_boot: the initialization state of the LSTM cell. - :type memory_boot: LayerOutput | None + :param out_memory: output of previous time step + :type out_memory: LayerOutput | None :param name: lstmemory unit name. :type name: basestring :param size: lstmemory unit size. @@ -676,33 +675,35 @@ def lstmemory_unit(input, :type gate_act: BaseActivation :param state_act: lstm state activiation type. :type state_act: BaseActivation - :param mixed_bias_attr: bias parameter attribute of mixed layer. - False means no bias, None means default bias. - :type mixed_bias_attr: ParameterAttribute|False + :param input_proj_bias_attr: bias attribute for input-to-hidden projection. + False means no bias, None means default bias. + :type input_proj_bias_attr: ParameterAttribute|False|None + :param input_proj_layer_attr: extra layer attribute for input to hidden + projection of the LSTM unit, such as dropout, error clipping. + :type input_proj_layer_attr: ExtraLayerAttribute :param lstm_bias_attr: bias parameter attribute of lstm layer. - False means no bias, None means default bias. + False means no bias, None means default bias. :type lstm_bias_attr: ParameterAttribute|False - :param mixed_layer_attr: mixed layer's extra attribute. - :type mixed_layer_attr: ExtraLayerAttribute :param lstm_layer_attr: lstm layer's extra attribute. :type lstm_layer_attr: ExtraLayerAttribute - :param get_output_layer_attr: get output layer's extra attribute. - :type get_output_layer_attr: ExtraLayerAttribute :return: lstmemory unit name. :rtype: LayerOutput """ if size is None: assert input.size % 4 == 0 size = input.size / 4 - out_mem = memory(name=name, size=size) - state_mem = memory( - name="%s_state" % name, size=size, boot_layer=memory_boot) + if out_memory is None: + out_mem = memory(name=name, size=size) + else: + out_mem = out_memory + + state_mem = memory(name="%s_state" % name, size=size) with mixed_layer( name="%s_input_recurrent" % name, size=size * 4, - bias_attr=mixed_bias_attr, - layer_attr=mixed_layer_attr, + bias_attr=input_proj_bias_attr, + layer_attr=input_proj_layer_attr, act=IdentityActivation()) as m: m += identity_projection(input=input) m += full_matrix_projection(input=out_mem, param_attr=param_attr) @@ -717,11 +718,7 @@ def lstmemory_unit(input, gate_act=gate_act, state_act=state_act, layer_attr=lstm_layer_attr) - get_output_layer( - name='%s_state' % name, - input=lstm_out, - arg_name='state', - layer_attr=get_output_layer_attr) + get_output_layer(name='%s_state' % name, input=lstm_out, arg_name='state') return lstm_out @@ -730,17 +727,16 @@ def lstmemory_unit(input, def lstmemory_group(input, size=None, name=None, - memory_boot=None, + out_memory=None, reverse=False, param_attr=None, act=None, gate_act=None, state_act=None, - mixed_bias_attr=None, + input_proj_bias_attr=None, + input_proj_layer_attr=None, lstm_bias_attr=None, - mixed_layer_attr=None, - lstm_layer_attr=None, - get_output_layer_attr=None): + lstm_layer_attr=None): """ lstm_group is a recurrent_group version of Long Short Term Memory. It does exactly the same calculation as the lstmemory layer (see lstmemory in @@ -774,8 +770,8 @@ def lstmemory_group(input, :type size: int :param name: name of the lstmemory group. :type name: basestring - :param memory_boot: the initialization state of LSTM cell. - :type memory_boot: LayerOutput | None + :param out_memory: output of previous time step + :type out_memory: LayerOutput | None :param reverse: is lstm reversed :type reverse: bool :param param_attr: Parameter config, None if use default. @@ -786,18 +782,17 @@ def lstmemory_group(input, :type gate_act: BaseActivation :param state_act: lstm state activiation type. :type state_act: BaseActivation - :param mixed_bias_attr: bias parameter attribute of mixed layer. - False means no bias, None means default bias. - :type mixed_bias_attr: ParameterAttribute|False :param lstm_bias_attr: bias parameter attribute of lstm layer. False means no bias, None means default bias. :type lstm_bias_attr: ParameterAttribute|False - :param mixed_layer_attr: mixed layer's extra attribute. - :type mixed_layer_attr: ExtraLayerAttribute + :param input_proj_bias_attr: bias attribute for input-to-hidden projection. + False means no bias, None means default bias. + :type input_proj_bias_attr: ParameterAttribute|False|None + :param input_proj_layer_attr: extra layer attribute for input to hidden + projection of the LSTM unit, such as dropout, error clipping. + :type input_proj_layer_attr: ExtraLayerAttribute :param lstm_layer_attr: lstm layer's extra attribute. :type lstm_layer_attr: ExtraLayerAttribute - :param get_output_layer_attr: get output layer's extra attribute. - :type get_output_layer_attr: ExtraLayerAttribute :return: the lstmemory group. :rtype: LayerOutput """ @@ -805,18 +800,17 @@ def lstmemory_group(input, def __lstm_step__(ipt): return lstmemory_unit( input=ipt, - memory_boot=memory_boot, name=name, size=size, - mixed_bias_attr=mixed_bias_attr, - mixed_layer_attr=mixed_layer_attr, - param_attr=param_attr, - lstm_bias_attr=lstm_bias_attr, act=act, gate_act=gate_act, state_act=state_act, + out_memory=out_memory, + input_proj_bias_attr=input_proj_bias_attr, + input_proj_layer_attr=input_proj_layer_attr, + param_attr=param_attr, lstm_layer_attr=lstm_layer_attr, - get_output_layer_attr=get_output_layer_attr) + lstm_bias_attr=lstm_bias_attr) return recurrent_group( name='%s_recurrent_group' % name, diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr index 7f2aa5a0fea1f4628e4effca5ce9af896f6e6c2c..75cf2312032e187dafc66199e933d3ad0fa33050 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr @@ -104,7 +104,7 @@ layers { } bias_parameter_name: "lstm_bias" active_gate_type: "sigmoid" - active_state_type: "sigmoid" + active_state_type: "tanh" } layers { name: "__lstm_group_0___state@__lstm_group_0___recurrent_group" @@ -183,7 +183,7 @@ layers { } bias_parameter_name: "lstm_bias" active_gate_type: "sigmoid" - active_state_type: "sigmoid" + active_state_type: "tanh" } layers { name: "__lstm_group_1___state@__lstm_group_1___recurrent_group" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr index af1b63c5dfbf0984a20eda02d608f76a454613c6..711785be37dbe7f2decc161d1b8e1ead62927b20 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr @@ -258,7 +258,7 @@ layers { } bias_parameter_name: "___lstm_group_0__@__lstm_group_0___recurrent_group.wbias" active_gate_type: "sigmoid" - active_state_type: "sigmoid" + active_state_type: "tanh" } layers { name: "__lstm_group_0___state@__lstm_group_0___recurrent_group" diff --git a/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py b/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py index 05810597b3154c3b287441465db16ee6e24b0ca2..565e281a6e1deff18aa48f97eb2f0e39ca79752f 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py +++ b/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py @@ -20,12 +20,13 @@ lstm1 = lstmemory_group( input=m1, param_attr=lstm_param, lstm_bias_attr=lstm_bias, - mixed_bias_attr=False) + input_proj_bias_attr=False) + lstm2 = lstmemory_group( input=m2, param_attr=lstm_param, lstm_bias_attr=lstm_bias, - mixed_bias_attr=False) + input_proj_bias_attr=False) softmax_param = ParamAttr(name='softmax_param') diff --git a/python/paddle/v2/dataset/flowers.py b/python/paddle/v2/dataset/flowers.py index e2a21e6e3e04e79fdfc225ce1b4496b6b69d1e89..634388094c804827657dc83d5c205e680625b156 100644 --- a/python/paddle/v2/dataset/flowers.py +++ b/python/paddle/v2/dataset/flowers.py @@ -116,7 +116,7 @@ def reader_creator(data_file, data = batch['data'] labels = batch['label'] for sample, label in itertools.izip(data, batch['label']): - yield sample, int(label) + yield sample, int(label) - 1 if use_xmap: return xmap_readers(mapper, reader, cpu_count(), buffered_size) diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index ec076e40c9312fee7f3ba030dc69208069fd45a8..aa67792ebc2100aabc213a7fc525c00e7b593c59 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -1,3 +1,14 @@ -add_python_test(test_framework test_protobuf.py test_scope.py - test_default_scope_funcs.py test_op_creation_methods.py - test_tensor.py test_fc_op.py test_add_two_op.py test_sgd_op.py) +add_python_test(test_framework + test_protobuf.py + test_scope.py + test_default_scope_funcs.py + test_op_creation_methods.py + test_tensor.py + test_fc_op.py + test_add_two_op.py + test_sgd_op.py + test_cross_entropy_op.py + test_mul_op.py + test_sigmoid_op.py + test_softmax_op.py + test_rowwise_add_op.py) diff --git a/python/paddle/v2/framework/tests/op_test_util.py b/python/paddle/v2/framework/tests/op_test_util.py index 237f9b7eb0d525a2c8431523a2d90b7e32493d53..7b62313f8aca5e9f515d1a9e6df3bb6f51b974fb 100644 --- a/python/paddle/v2/framework/tests/op_test_util.py +++ b/python/paddle/v2/framework/tests/op_test_util.py @@ -5,6 +5,18 @@ import paddle.v2.framework.create_op_creation_methods as creation class OpTestMeta(type): + """ + Operator Test ClassMeta. + + It injects `test_all` method into user's OperatorTest class, to make Python + unittest module run that method. + + The `test_all` read what value is stored in `self`. It use self's values to + create and run a operator, and check whether that op is OK or not. + + See `test_add_two_op` for example usage. + """ + def __new__(cls, name, bases, attrs): obj = super(OpTestMeta, cls).__new__(cls, name, bases, attrs) @@ -44,7 +56,10 @@ class OpTestMeta(type): for out_name in func.all_output_args: actual = numpy.array(scope.get_var(out_name).get_tensor()) expect = getattr(self, out_name) - numpy.testing.assert_almost_equal(actual, expect) + # TODO(qijun) The default decimal is 7, but numpy.dot and eigen.mul + # has some diff, and could not pass unittest. So I set decimal 3 here. + # And I will check this in future. + numpy.testing.assert_almost_equal(actual, expect, decimal=3) obj.test_all = test_all return obj diff --git a/python/paddle/v2/framework/tests/test_cross_entropy_op.py b/python/paddle/v2/framework/tests/test_cross_entropy_op.py new file mode 100644 index 0000000000000000000000000000000000000000..609c56535ef0365dda728cba334d8b4d96312192 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_cross_entropy_op.py @@ -0,0 +1,22 @@ +import unittest +import numpy +from op_test_util import OpTestMeta + + +class TestSGD(unittest.TestCase): + __metaclass__ = OpTestMeta + + def setUp(self): + self.type = "onehot_cross_entropy" + batch_size = 100 + class_num = 10 + self.X = numpy.random.random((batch_size, class_num)).astype("float32") + self.label = 5 * numpy.ones(batch_size).astype("int32") + Y = [] + for i in range(0, batch_size): + Y.append(-numpy.log(self.X[i][self.label[i]])) + self.Y = numpy.array(Y).astype("float32") + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_mul_op.py b/python/paddle/v2/framework/tests/test_mul_op.py new file mode 100644 index 0000000000000000000000000000000000000000..0a87e66cd03af1bf84be8ffe111e4a8c3a24d6dc --- /dev/null +++ b/python/paddle/v2/framework/tests/test_mul_op.py @@ -0,0 +1,17 @@ +import unittest +from op_test_util import OpTestMeta +import numpy as np + + +class TestMulOp(unittest.TestCase): + __metaclass__ = OpTestMeta + + def setUp(self): + self.type = "mul" + self.X = np.random.random((32, 784)).astype("float32") + self.Y = np.random.random((784, 100)).astype("float32") + self.Out = np.dot(self.X, self.Y) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_rowwise_add_op.py b/python/paddle/v2/framework/tests/test_rowwise_add_op.py new file mode 100644 index 0000000000000000000000000000000000000000..ef1514983c03f822f84b85437d1cfe653b6a1a2e --- /dev/null +++ b/python/paddle/v2/framework/tests/test_rowwise_add_op.py @@ -0,0 +1,17 @@ +import unittest +from op_test_util import OpTestMeta +import numpy as np + + +class TestRowwiseAddOp(unittest.TestCase): + __metaclass__ = OpTestMeta + + def setUp(self): + self.type = "rowwise_add" + self.X = np.random.random((32, 784)).astype("float32") + self.b = np.random.random(784).astype("float32") + self.Out = np.add(self.X, self.b) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_sigmoid_op.py b/python/paddle/v2/framework/tests/test_sigmoid_op.py new file mode 100644 index 0000000000000000000000000000000000000000..50044a122f1d66dd54a24f6cce76074a60ee2262 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_sigmoid_op.py @@ -0,0 +1,16 @@ +import unittest +from op_test_util import OpTestMeta +import numpy as np + + +class TestSigmoidOp(unittest.TestCase): + __metaclass__ = OpTestMeta + + def setUp(self): + self.type = "sigmoid" + self.X = np.random.random((32, 100)).astype("float32") + self.Y = 1 / (1 + np.exp(-self.X)) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_softmax_op.py b/python/paddle/v2/framework/tests/test_softmax_op.py new file mode 100644 index 0000000000000000000000000000000000000000..191b698c1cdec9b86b4ded6b1f743586867ca62f --- /dev/null +++ b/python/paddle/v2/framework/tests/test_softmax_op.py @@ -0,0 +1,23 @@ +import unittest +from op_test_util import OpTestMeta +import numpy as np + + +def stable_softmax(x): + """Compute the softmax of vector x in a numerically stable way.""" + shiftx = x - np.max(x) + exps = np.exp(shiftx) + return exps / np.sum(exps) + + +class TestSoftmaxOp(unittest.TestCase): + __metaclass__ = OpTestMeta + + def setUp(self): + self.type = "softmax" + self.X = np.random.random((32, 100)).astype("float32") + self.Y = np.apply_along_axis(stable_softmax, 1, self.X) + + +if __name__ == '__main__': + unittest.main()