提交 6cae35b5 编写于 作者: Y Yi Wang

Fix conflicts

...@@ -36,6 +36,8 @@ include(simd) ...@@ -36,6 +36,8 @@ include(simd)
################################ Configurations ####################################### ################################ Configurations #######################################
option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND}) option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND})
option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND}) option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND})
option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." OFF)
option(WITH_MKLML "Compile PaddlePaddle with mklml package." OFF)
option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON)
option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON) option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON)
option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON) option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON)
...@@ -74,6 +76,10 @@ if(ANDROID) ...@@ -74,6 +76,10 @@ if(ANDROID)
"Disable PYTHON when cross-compiling for Android" FORCE) "Disable PYTHON when cross-compiling for Android" FORCE)
set(WITH_RDMA OFF CACHE STRING set(WITH_RDMA OFF CACHE STRING
"Disable RDMA when cross-compiling for Android" FORCE) "Disable RDMA when cross-compiling for Android" FORCE)
set(WITH_MKLDNN OFF CACHE STRING
"Disable MKLDNN when cross-compiling for Android" FORCE)
set(WITH_MKLML OFF CACHE STRING
"Disable MKLML package when cross-compiling for Android" FORCE)
endif(ANDROID) endif(ANDROID)
set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
...@@ -87,6 +93,7 @@ endif() ...@@ -87,6 +93,7 @@ endif()
######################################################################################## ########################################################################################
include(external/mklml) # download mklml package
include(external/zlib) # download, build, install zlib include(external/zlib) # download, build, install zlib
include(external/gflags) # download, build, install gflags include(external/gflags) # download, build, install gflags
include(external/glog) # download, build, install glog include(external/glog) # download, build, install glog
...@@ -94,6 +101,7 @@ include(external/gtest) # download, build, install gtest ...@@ -94,6 +101,7 @@ include(external/gtest) # download, build, install gtest
include(external/protobuf) # download, build, install protobuf include(external/protobuf) # download, build, install protobuf
include(external/python) # download, build, install python include(external/python) # download, build, install python
include(external/openblas) # download, build, install openblas include(external/openblas) # download, build, install openblas
include(external/mkldnn) # download, build, install mkldnn
include(external/swig) # download, build, install swig include(external/swig) # download, build, install swig
include(external/warpctc) # download, build, install warpctc include(external/warpctc) # download, build, install warpctc
include(external/any) # download libn::any include(external/any) # download libn::any
...@@ -135,6 +143,10 @@ if(WITH_GPU) ...@@ -135,6 +143,10 @@ if(WITH_GPU)
endif(NOT WITH_DSO) endif(NOT WITH_DSO)
endif(WITH_GPU) endif(WITH_GPU)
if(WITH_MKLDNN)
list(APPEND EXTERNAL_LIBS ${MKLDNN_LIBRARY} ${MKLDNN_IOMP_LIB})
endif()
if(USE_NNPACK) if(USE_NNPACK)
include(external/nnpack) include(external/nnpack)
list(APPEND EXTERNAL_LIBS ${NNPACK_LIBS}) list(APPEND EXTERNAL_LIBS ${NNPACK_LIBS})
......
...@@ -15,23 +15,44 @@ ...@@ -15,23 +15,44 @@
set(CBLAS_FOUND OFF) set(CBLAS_FOUND OFF)
## Find MKL First. ## Find MKLML First.
set(INTEL_ROOT "/opt/intel" CACHE PATH "Folder contains intel libs") if(WITH_MKLML AND MKLML_INC_DIR AND MKLML_LIB)
set(MKL_ROOT ${INTEL_ROOT}/mkl CACHE PATH "Folder contains MKL") set(CBLAS_FOUND ON)
set(CBLAS_PROVIDER MKLML)
set(CBLAS_INC_DIR ${MKLML_INC_DIR})
set(CBLAS_LIBRARIES ${MKLML_LIB})
add_definitions(-DPADDLE_USE_MKLML)
add_definitions(-DLAPACK_FOUND)
message(STATUS "Found cblas and lapack in MKLML "
"(include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
return()
endif()
## Then find MKL.
set(INTEL_MKL_ROOT "/opt/intel/mkl" CACHE PATH "Folder contains intel mkl libs")
set(MKL_ROOT $ENV{MKL_ROOT} CACHE PATH "Folder contains env MKL")
set(MKL_INCLUDE_SEARCH_PATHS
${MKL_ROOT}/include
${INTEL_MKL_ROOT}/include)
set(MKL_LIB_SEARCH_PATHS
${MKL_ROOT}/lib
${MKL_ROOT}/lib/intel64
${INTEL_MKL_ROOT}/lib
${INTEL_MKL_ROOT}/lib/intel64)
find_path(MKL_INC_DIR mkl.h PATHS find_path(MKL_INC_DIR mkl.h PATHS
${MKL_ROOT}/include) ${MKL_INCLUDE_SEARCH_PATHS})
find_path(MKL_LAPACK_INC_DIR mkl_lapacke.h PATHS find_path(MKL_LAPACK_INC_DIR mkl_lapacke.h PATHS
${MKL_ROOT}/include) ${MKL_INCLUDE_SEARCH_PATHS})
find_library(MKL_CORE_LIB NAMES mkl_core PATHS find_library(MKL_CORE_LIB NAMES mkl_core PATHS
${MKL_ROOT}/lib ${MKL_LIB_SEARCH_PATHS})
${MKL_ROOT}/lib/intel64)
find_library(MKL_SEQUENTIAL_LIB NAMES mkl_sequential PATHS find_library(MKL_SEQUENTIAL_LIB NAMES mkl_sequential PATHS
${MKL_ROOT}/lib ${MKL_LIB_SEARCH_PATHS})
${MKL_ROOT}/lib/intel64)
find_library(MKL_INTEL_LP64 NAMES mkl_intel_lp64 PATHS find_library(MKL_INTEL_LP64 NAMES mkl_intel_lp64 PATHS
${MKL_ROOT}/lib ${MKL_LIB_SEARCH_PATHS})
${MKL_ROOT}/lib/intel64)
if(MKL_LAPACK_INC_DIR AND MKL_INC_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64) if(MKL_LAPACK_INC_DIR AND MKL_INC_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64)
set(CBLAS_FOUND ON) set(CBLAS_FOUND ON)
......
...@@ -67,6 +67,30 @@ else() ...@@ -67,6 +67,30 @@ else()
include_directories(${CUDA_TOOLKIT_INCLUDE}) include_directories(${CUDA_TOOLKIT_INCLUDE})
endif(NOT WITH_GPU) endif(NOT WITH_GPU)
if(WITH_MKLDNN)
add_definitions(-DPADDLE_USE_MKLDNN)
if (WITH_MKLML AND MKLDNN_IOMP_DIR)
message(STATUS "Enable Intel OpenMP at ${MKLDNN_IOMP_DIR}")
set(OPENMP_FLAGS "-fopenmp")
set(CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS})
set(CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS})
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -L${MKLDNN_IOMP_DIR} -liomp5 -Wl,--as-needed")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${MKLDNN_IOMP_DIR} -liomp5 -Wl,--as-needed")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENMP_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_FLAGS}")
else()
find_package(OpenMP)
if(OPENMP_FOUND)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
else()
message(WARNING "Can not find OpenMP."
"Some performance features in MKLDNN may not be available")
endif()
endif()
endif(WITH_MKLDNN)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SIMD_FLAG}") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SIMD_FLAG}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SIMD_FLAG}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SIMD_FLAG}")
......
...@@ -34,9 +34,15 @@ IF(WITH_TESTING) ...@@ -34,9 +34,15 @@ IF(WITH_TESTING)
"${GTEST_INSTALL_DIR}/lib/libgtest_main.a" CACHE FILEPATH "gtest main libraries." FORCE) "${GTEST_INSTALL_DIR}/lib/libgtest_main.a" CACHE FILEPATH "gtest main libraries." FORCE)
ENDIF(WIN32) ENDIF(WIN32)
IF(WITH_MKLML)
# wait for mklml downloading completed
SET(GTEST_DEPENDS ${MKLML_PROJECT})
ENDIF()
ExternalProject_Add( ExternalProject_Add(
extern_gtest extern_gtest
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
DEPENDS ${GTEST_DEPENDS}
GIT_REPOSITORY "https://github.com/google/googletest.git" GIT_REPOSITORY "https://github.com/google/googletest.git"
GIT_TAG "release-1.8.0" GIT_TAG "release-1.8.0"
PREFIX ${GTEST_SOURCES_DIR} PREFIX ${GTEST_SOURCES_DIR}
......
# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
IF(NOT ${WITH_MKLDNN})
return()
ENDIF(NOT ${WITH_MKLDNN})
INCLUDE(ExternalProject)
SET(MKLDNN_PROJECT "extern_mkldnn")
SET(MKLDNN_SOURCES_DIR ${THIRD_PARTY_PATH}/mkldnn)
SET(MKLDNN_INSTALL_ROOT ${CMAKE_INSTALL_PREFIX})
IF(NOT "$ENV{HOME}" STREQUAL "/root")
SET(MKLDNN_INSTALL_ROOT "$ENV{HOME}")
ENDIF()
SET(MKLDNN_INSTALL_DIR "${MKLDNN_INSTALL_ROOT}/opt/paddle/third_party/mkldnn")
SET(MKLDNN_INCLUDE_DIR "${MKLDNN_INSTALL_DIR}/include" CACHE PATH "mkldnn include directory." FORCE)
IF(WIN32)
MESSAGE(WARNING "It is not supported compiling with mkldnn in windows Paddle yet."
"Force WITH_MKLDNN=OFF")
SET(WITH_MKLDNN OFF)
return()
ELSE(WIN32)
SET(MKLDNN_LIBRARY "${MKLDNN_INSTALL_DIR}/lib/libmkldnn.so" CACHE FILEPATH "mkldnn library." FORCE)
MESSAGE(STATUS "Set ${MKLDNN_INSTALL_DIR}/lib to runtime path")
SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
#SET(CMAKE_MACOSX_RPATH 1) # hold for MacOS
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLDNN_INSTALL_DIR}/lib")
ENDIF(WIN32)
INCLUDE_DIRECTORIES(${MKLDNN_INCLUDE_DIR})
IF(${CBLAS_PROVIDER} STREQUAL "MKLML")
SET(MKLDNN_DEPENDS ${MKLML_PROJECT})
SET(MKLDNN_MKLROOT ${MKLML_ROOT})
SET(MKLDNN_IOMP_LIB ${MKLML_IOMP_LIB})
SET(MKLDNN_IOMP_DIR ${MKLML_LIB_DIR})
ENDIF()
ExternalProject_Add(
${MKLDNN_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
DEPENDS ${MKLDNN_DEPENDS}
GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git"
GIT_TAG "v0.9"
PREFIX ${MKLDNN_SOURCES_DIR}
CONFIGURE_COMMAND mkdir -p <SOURCE_DIR>/build
BUILD_COMMAND cd <SOURCE_DIR>/build
&& cmake .. -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} -DMKLROOT=${MKLDNN_MKLROOT}
&& $(MAKE)
INSTALL_COMMAND cd <SOURCE_DIR>/build && $(MAKE) install
UPDATE_COMMAND ""
)
ADD_LIBRARY(mkldnn SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIBRARY})
ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT})
MESSAGE(STATUS "Mkldnn library: ${MKLDNN_LIBRARY}")
LIST(APPEND external_project_dependencies mkldnn)
# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
IF(NOT ${WITH_MKLML})
return()
ENDIF(NOT ${WITH_MKLML})
INCLUDE(ExternalProject)
SET(MKLML_PROJECT "extern_mklml")
SET(MKLML_VER "mklml_lnx_2018.0.20170425")
SET(MKLML_URL "https://github.com/01org/mkl-dnn/releases/download/v0.9/${MKLML_VER}.tgz")
SET(MKLML_SOURCE_DIR "${THIRD_PARTY_PATH}/mklml")
SET(MKLML_DOWNLOAD_DIR "${MKLML_SOURCE_DIR}/src/${MKLML_PROJECT}")
SET(MKLML_DST_DIR "opt/paddle/third_party/mklml")
SET(MKLML_INSTALL_ROOT "${CMAKE_INSTALL_PREFIX}")
IF(NOT "$ENV{HOME}" STREQUAL "/root")
SET(MKLML_INSTALL_ROOT "$ENV{HOME}")
ENDIF()
SET(MKLML_INSTALL_DIR ${MKLML_INSTALL_ROOT}/${MKLML_DST_DIR})
SET(MKLML_ROOT ${MKLML_INSTALL_DIR}/${MKLML_VER})
SET(MKLML_INC_DIR ${MKLML_ROOT}/include)
SET(MKLML_LIB_DIR ${MKLML_ROOT}/lib)
SET(MKLML_LIB ${MKLML_LIB_DIR}/libmklml_intel.so)
SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so)
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLML_ROOT}/lib")
INCLUDE_DIRECTORIES(${MKLML_INC_DIR})
SET(mklml_cmakefile ${MKLML_DOWNLOAD_DIR}/CMakeLists.txt)
FILE(WRITE ${mklml_cmakefile} "PROJECT(MKLML)\n"
"cmake_minimum_required(VERSION 3.0)\n"
"install(DIRECTORY ${MKLML_VER}\n"
" DESTINATION ${MKLML_DST_DIR})\n")
ExternalProject_Add(
${MKLML_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${MKLML_SOURCE_DIR}
DOWNLOAD_DIR ${MKLML_DOWNLOAD_DIR}
DOWNLOAD_COMMAND wget --no-check-certificate -O ${MKLML_DOWNLOAD_DIR}/${MKLML_VER}.tgz ${MKLML_URL}
&& tar -xzf ${MKLML_DOWNLOAD_DIR}/${MKLML_VER}.tgz
DOWNLOAD_NO_PROGRESS 1
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLML_INSTALL_ROOT}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLML_INSTALL_ROOT}
)
ADD_LIBRARY(mklml SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET mklml PROPERTY IMPORTED_LOCATION ${MKLML_LIB})
ADD_DEPENDENCIES(mklml ${MKLML_PROJECT})
LIST(APPEND external_project_dependencies mklml)
...@@ -124,6 +124,7 @@ set(GPU_COMMON_FLAGS ...@@ -124,6 +124,7 @@ set(GPU_COMMON_FLAGS
-Wno-error=literal-suffix -Wno-error=literal-suffix
-Wno-error=unused-local-typedefs -Wno-error=unused-local-typedefs
-Wno-error=unused-function # Warnings in Numpy Header. -Wno-error=unused-function # Warnings in Numpy Header.
-Wno-error=array-bounds # Warnings in Eigen::array
) )
if (APPLE) if (APPLE)
......
// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main package main
import ( import (
......
// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main package main
import ( import (
......
// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package connection package connection
import ( import (
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
if(WITH_TESTING) if(WITH_TESTING)
go_test(master_test) go_test(master_test)
endif() endif()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
go_library(paddle_master SHARED DEPS paddle_go_optimizer) go_library(paddle_master SHARED DEPS paddle_go_optimizer)
// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main package main
/* /*
......
// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package master package master
import ( import (
......
// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package master package master
import ( import (
......
// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package master_test package master_test
import ( import (
......
// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package master package master
import ( import (
......
// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package master package master
import "sync" import "sync"
......
// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package master package master
import ( import (
......
// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package master package master
import "testing" import "testing"
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
if(WITH_TESTING) if(WITH_TESTING)
go_test(pserver_test DEPS paddle_go_optimizer) go_test(pserver_test DEPS paddle_go_optimizer)
endif() endif()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
if(WITH_TESTING) if(WITH_TESTING)
go_test(pserver_client_test DEPS paddle_go_optimizer) go_test(pserver_client_test DEPS paddle_go_optimizer)
endif() endif()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
cc_library(paddle_go_optimizer DEPS paddle_optimizer paddle_proto glog gflags protobuf) cc_library(paddle_go_optimizer DEPS paddle_optimizer paddle_proto glog gflags protobuf)
target_link_libraries(paddle_go_optimizer stdc++ m) target_link_libraries(paddle_go_optimizer stdc++ m)
......
// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main package main
/* /*
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
cc_test(test_cclient SRCS test_cclient.c DEPS paddle_pserver_cclient paddle_go_optimizer) cc_test(test_cclient SRCS test_cclient.c DEPS paddle_pserver_cclient paddle_go_optimizer)
add_style_check_target(test_cclient test_cclient.c) add_style_check_target(test_cclient test_cclient.c)
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
......
// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package client package client
import ( import (
......
// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package client_test package client_test
import ( import (
......
// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package client package client
import ( import (
...@@ -66,10 +80,10 @@ func (p *EtcdClient) List() []Server { ...@@ -66,10 +80,10 @@ func (p *EtcdClient) List() []Server {
for { for {
for i := 0; i < psDesired; i++ { for i := 0; i < psDesired; i++ {
ctx, cancel := context.WithTimeout(context.Background(), p.timeout) ctx, cancel := context.WithTimeout(context.Background(), p.timeout)
cancel()
psKey := pserver.PsPath + strconv.Itoa(i) psKey := pserver.PsPath + strconv.Itoa(i)
log.Debugf("checking %s", psKey) log.Debugf("checking %s", psKey)
resp, err := p.client.Get(ctx, psKey) resp, err := p.client.Get(ctx, psKey)
cancel()
if err != nil { if err != nil {
log.Infof("Get psKey= %s error, %v", psKey, err) log.Infof("Get psKey= %s error, %v", psKey, err)
time.Sleep(p.timeout) time.Sleep(p.timeout)
......
// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package pserver package pserver
import ( import (
......
// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package pserver package pserver
// #cgo CFLAGS: -I ../../ // #cgo CFLAGS: -I ../../
......
// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package pserver package pserver
import ( import (
......
// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package pserver package pserver
import ( import (
......
// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package pserver_test package pserver_test
import ( import (
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
if(WITH_TESTING) if(WITH_TESTING)
go_test(network_helper_test) go_test(network_helper_test)
endif() endif()
// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package networkhelper package networkhelper
import ( import (
......
// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package networkhelper package networkhelper
import "testing" import "testing"
......
...@@ -65,14 +65,15 @@ template <typename T, int MajorType = Eigen::RowMajor, ...@@ -65,14 +65,15 @@ template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex> typename IndexType = Eigen::DenseIndex>
struct EigenVector : public EigenTensor<T, 1, MajorType, IndexType> { struct EigenVector : public EigenTensor<T, 1, MajorType, IndexType> {
// Flatten is to reshape a Tensor into a one dimension EigenVector // Flatten is to reshape a Tensor into a one dimension EigenVector
static typename EigenTensor<T, 1>::Type Flatten(Tensor& tensor) { using Parent = EigenTensor<T, 1, MajorType, IndexType>;
return EigenTensor<T, 1>::From( static typename Parent::Type Flatten(Tensor& tensor) {
tensor, make_ddim({static_cast<int>(product(tensor.dims_))})); return Parent::From(tensor,
make_ddim({static_cast<int>(product(tensor.dims_))}));
} }
static typename EigenTensor<T, 1>::ConstType Flatten(const Tensor& tensor) { static typename Parent::ConstType Flatten(const Tensor& tensor) {
return EigenTensor<T, 1>::From( return Parent::From(tensor,
tensor, make_ddim({static_cast<int>(product(tensor.dims_))})); make_ddim({static_cast<int>(product(tensor.dims_))}));
} }
}; };
......
...@@ -48,25 +48,27 @@ class Tensor { ...@@ -48,25 +48,27 @@ class Tensor {
template <typename T> template <typename T>
const T* data() const { const T* data() const {
CheckDims<T>(); EnforceSufficientMemory<T>();
return reinterpret_cast<const T*>( return reinterpret_cast<const T*>(
reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_); reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_);
} }
template <typename T> template <typename T>
T* data() { T* data() {
CheckDims<T>(); EnforceSufficientMemory<T>();
return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(holder_->ptr()) + return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
offset_); offset_);
} }
template <typename T> template <typename T, // must be POD types
typename std::enable_if<std::is_pod<T>::value>::type* = nullptr>
T* mutable_data(DDim dims, platform::Place place) { T* mutable_data(DDim dims, platform::Place place) {
set_dims(dims); Resize(dims);
return mutable_data<T>(place); return mutable_data<T>(place);
} }
template <typename T> template <typename T, // must be POD types
typename std::enable_if<std::is_pod<T>::value>::type* = nullptr>
T* mutable_data(platform::Place place) { T* mutable_data(platform::Place place) {
PADDLE_ENFORCE(product(dims_) > 0, PADDLE_ENFORCE(product(dims_) > 0,
"Tensor's numel must be larger than zero to call " "Tensor's numel must be larger than zero to call "
...@@ -95,11 +97,9 @@ class Tensor { ...@@ -95,11 +97,9 @@ class Tensor {
} }
template <typename T> template <typename T>
void ShareDataFrom(const Tensor& src) { void ShareDataWith(const Tensor& src) {
src.CheckDims<T>(); src.EnforceSufficientMemory<T>();
holder_ = src.holder_; *this = src;
set_dims(src.dims());
offset_ = src.offset_;
} }
template <typename T> template <typename T>
...@@ -107,9 +107,9 @@ class Tensor { ...@@ -107,9 +107,9 @@ class Tensor {
PADDLE_ENFORCE(platform::is_cpu_place(src.holder_->place()) && PADDLE_ENFORCE(platform::is_cpu_place(src.holder_->place()) &&
platform::is_cpu_place(dst_place), platform::is_cpu_place(dst_place),
"Tensor::CopyFrom only support CPU now."); "Tensor::CopyFrom only support CPU now.");
src.CheckDims<T>(); src.EnforceSufficientMemory<T>();
size_t size = product(src.dims_) * sizeof(T); size_t size = product(src.dims_) * sizeof(T);
set_dims(src.dims()); Resize(src.dims());
const void* src_ptr = static_cast<const void*>(src.data<T>()); const void* src_ptr = static_cast<const void*>(src.data<T>());
void* dst_ptr = static_cast<void*>(mutable_data<T>(dst_place)); void* dst_ptr = static_cast<void*>(mutable_data<T>(dst_place));
memcpy(dst_ptr, src_ptr, size); memcpy(dst_ptr, src_ptr, size);
...@@ -117,34 +117,25 @@ class Tensor { ...@@ -117,34 +117,25 @@ class Tensor {
template <typename T> template <typename T>
Tensor Slice(const int& begin_idx, const int& end_idx) const { Tensor Slice(const int& begin_idx, const int& end_idx) const {
CheckDims<T>(); EnforceSufficientMemory<T>();
PADDLE_ENFORCE(begin_idx >= 0 && end_idx <= dims_[0], PADDLE_ENFORCE(begin_idx >= 0, "Slice begin index is less than zero.");
"Slice index is less than zero or out of bound."); PADDLE_ENFORCE(end_idx <= dims_[0], "Slice end index is out of bound.");
PADDLE_ENFORCE(begin_idx < end_idx, PADDLE_ENFORCE(begin_idx < end_idx,
"Begin index must be less than end index."); "Begin index must be less than end index.");
PADDLE_ENFORCE(dims_[0] != 1, "Can not slice a tensor with dims_[0] = 1."); PADDLE_ENFORCE(dims_[0] != 1, "Can not slice a tensor with dims_[0] = 1.");
std::vector<int> d = vectorize(dims_); int base = product(dims_) / dims_[0];
int base = 1;
for (size_t i = 1; i < d.size(); ++i) {
base *= d[i];
}
Tensor dst; Tensor dst;
dst.holder_ = holder_; dst.holder_ = holder_;
DDim dst_dims = dims_; DDim dst_dims = dims_;
dst_dims[0] = end_idx - begin_idx; dst_dims[0] = end_idx - begin_idx;
dst.set_dims(dst_dims); dst.Resize(dst_dims);
dst.offset_ = offset_ + begin_idx * base * sizeof(T); dst.offset_ = offset_ + begin_idx * base * sizeof(T);
return dst; return dst;
} }
void set_dims(const DDim& dims) { void Resize(const DDim& dims) { dims_ = dims; }
if (dims == dims_) {
return;
}
dims_ = dims;
}
DDim dims() const { return dims_; } const DDim& dims() const { return dims_; }
private: private:
// Placeholder hides type T, so it doesn't appear as a template // Placeholder hides type T, so it doesn't appear as a template
...@@ -159,21 +150,9 @@ class Tensor { ...@@ -159,21 +150,9 @@ class Tensor {
template <typename T, typename PlaceType> template <typename T, typename PlaceType>
struct PlaceholderImpl : public Placeholder { struct PlaceholderImpl : public Placeholder {
private:
template <typename PType>
class Deleter {
public:
Deleter(PType place) : place_(place) {}
void operator()(T* ptr) { memory::Free(place_, static_cast<void*>(ptr)); }
private:
PType place_;
};
public:
PlaceholderImpl(PlaceType place, size_t size) PlaceholderImpl(PlaceType place, size_t size)
: ptr_(static_cast<T*>(memory::Alloc(place, size)), : ptr_(static_cast<T*>(memory::Alloc(place, size)),
Deleter<PlaceType>(place)), memory::PODDeleter<T, PlaceType>(place)),
place_(place), place_(place),
size_(size) {} size_(size) {}
...@@ -182,13 +161,13 @@ class Tensor { ...@@ -182,13 +161,13 @@ class Tensor {
virtual paddle::platform::Place place() const { return place_; } virtual paddle::platform::Place place() const { return place_; }
virtual std::type_index type() const { return std::type_index(typeid(T)); } virtual std::type_index type() const { return std::type_index(typeid(T)); }
std::unique_ptr<T, Deleter<PlaceType>> ptr_; std::unique_ptr<T, memory::PODDeleter<T, PlaceType>> ptr_;
platform::Place place_; // record the place of ptr_. platform::Place place_; // record the place of ptr_.
size_t size_; // size of the memory block. size_t size_; // size of the memory block.
}; };
template <typename T> template <typename T>
inline void CheckDims() const { inline void EnforceSufficientMemory() const {
PADDLE_ENFORCE(holder_ != nullptr, PADDLE_ENFORCE(holder_ != nullptr,
"Tenosr holds no memory. Call Tensor::mutable_data first."); "Tenosr holds no memory. Call Tensor::mutable_data first.");
PADDLE_ENFORCE(holder_->size() >= product(dims_) * sizeof(T) + offset_, PADDLE_ENFORCE(holder_->size() >= product(dims_) * sizeof(T) + offset_,
...@@ -198,7 +177,11 @@ class Tensor { ...@@ -198,7 +177,11 @@ class Tensor {
std::shared_ptr<Placeholder> holder_; // holds the memory block if allocated. std::shared_ptr<Placeholder> holder_; // holds the memory block if allocated.
DDim dims_; DDim dims_;
size_t offset_; // marks the begin of tensor data area. // A PlaceHolder may be shared by more than one tensor. Some of them may be
// slices of the others. So the offset_ is introduced here to indicate the
// byte offset between PlaceHolder::ptr_ and where tensor's data really
// begins.
size_t offset_;
}; };
} // namespace framework } // namespace framework
......
...@@ -19,7 +19,7 @@ TEST(Tensor, Dims) { ...@@ -19,7 +19,7 @@ TEST(Tensor, Dims) {
using namespace paddle::framework; using namespace paddle::framework;
using namespace paddle::platform; using namespace paddle::platform;
Tensor tt; Tensor tt;
tt.set_dims(make_ddim({2, 3, 4})); tt.Resize(make_ddim({2, 3, 4}));
DDim dims = tt.dims(); DDim dims = tt.dims();
ASSERT_EQ(arity(dims), 3); ASSERT_EQ(arity(dims), 3);
for (int i = 0; i < 3; ++i) { for (int i = 0; i < 3; ++i) {
...@@ -97,7 +97,7 @@ TEST(Tensor, MutableData) { ...@@ -97,7 +97,7 @@ TEST(Tensor, MutableData) {
#endif #endif
} }
TEST(Tensor, ShareDataFrom) { TEST(Tensor, ShareDataWith) {
using namespace paddle::framework; using namespace paddle::framework;
using namespace paddle::platform; using namespace paddle::platform;
{ {
...@@ -106,7 +106,7 @@ TEST(Tensor, ShareDataFrom) { ...@@ -106,7 +106,7 @@ TEST(Tensor, ShareDataFrom) {
// Try to share data form uninitialized tensor // Try to share data form uninitialized tensor
bool caught = false; bool caught = false;
try { try {
dst_tensor.ShareDataFrom<float>(src_tensor); dst_tensor.ShareDataWith<float>(src_tensor);
} catch (std::runtime_error& err) { } catch (std::runtime_error& err) {
caught = true; caught = true;
std::string msg = std::string msg =
...@@ -119,7 +119,7 @@ TEST(Tensor, ShareDataFrom) { ...@@ -119,7 +119,7 @@ TEST(Tensor, ShareDataFrom) {
ASSERT_TRUE(caught); ASSERT_TRUE(caught);
src_tensor.mutable_data<int>(make_ddim({2, 3, 4}), CPUPlace()); src_tensor.mutable_data<int>(make_ddim({2, 3, 4}), CPUPlace());
dst_tensor.ShareDataFrom<int>(src_tensor); dst_tensor.ShareDataWith<int>(src_tensor);
ASSERT_EQ(src_tensor.data<int>(), dst_tensor.data<int>()); ASSERT_EQ(src_tensor.data<int>(), dst_tensor.data<int>());
} }
...@@ -128,7 +128,7 @@ TEST(Tensor, ShareDataFrom) { ...@@ -128,7 +128,7 @@ TEST(Tensor, ShareDataFrom) {
Tensor src_tensor; Tensor src_tensor;
Tensor dst_tensor; Tensor dst_tensor;
src_tensor.mutable_data<int>(make_ddim({2, 3, 4}), GPUPlace()); src_tensor.mutable_data<int>(make_ddim({2, 3, 4}), GPUPlace());
dst_tensor.ShareDataFrom<int>(src_tensor); dst_tensor.ShareDataWith<int>(src_tensor);
ASSERT_EQ(src_tensor.data<int>(), dst_tensor.data<int>()); ASSERT_EQ(src_tensor.data<int>(), dst_tensor.data<int>());
} }
#endif #endif
......
...@@ -31,13 +31,22 @@ public: ...@@ -31,13 +31,22 @@ public:
ConvolutionTest(const std::string& conv1, ConvolutionTest(const std::string& conv1,
const std::string& conv2, const std::string& conv2,
TestType type, TestType type,
bool useGroups = true,
std::string algo = "auto") { std::string algo = "auto") {
for (size_t batchSize : {1, 32}) { for (size_t batchSize : {1, 32}) {
for (size_t inputSize : {7, 14, 54}) { for (size_t inputSize : {7, 14, 54}) {
for (size_t filterSize : {1, 3, 5}) { for (size_t filterSize : {1, 3, 5}) {
for (size_t inputChannels : {3, 64}) { for (size_t inputChannels : {3, 64}) {
for (size_t outputChannels : {3, 64, 128}) { for (size_t outputChannels : {3, 64}) {
if (inputChannels < outputChannels) break; if (inputChannels > outputChannels) break;
size_t groups;
if (!useGroups) {
groups = 1;
} else {
if (outputChannels % inputChannels != 0) continue;
groups = inputChannels;
}
for (size_t stride : {1, 2}) { for (size_t stride : {1, 2}) {
for (size_t padding : {0, 1}) { for (size_t padding : {0, 1}) {
if (padding >= filterSize) break; if (padding >= filterSize) break;
...@@ -62,13 +71,24 @@ public: ...@@ -62,13 +71,24 @@ public:
FuncConfig() FuncConfig()
.set("paddings", paddings) .set("paddings", paddings)
.set("strides", strides) .set("strides", strides)
.set("groups", (size_t)1) .set("groups", groups)
.set("algo", algo)); .set("algo", algo));
TensorShape input{ TensorShape input{
batchSize, inputChannels, inputSize, inputSize}; batchSize, inputChannels, inputSize, inputSize};
TensorShape filter{
outputChannels, inputChannels, filterSize, filterSize}; TensorShape filter;
if (groups > 1)
filter = TensorShape({groups,
outputChannels / groups,
inputChannels / groups,
filterSize,
filterSize});
else
filter = TensorShape({outputChannels,
inputChannels,
filterSize,
filterSize});
TensorShape output{ TensorShape output{
batchSize, outputChannels, outputSize, outputSize}; batchSize, outputChannels, outputSize, outputSize};
...@@ -85,7 +105,8 @@ public: ...@@ -85,7 +105,8 @@ public:
} else if (type == kBackwardFilterTest) { } else if (type == kBackwardFilterTest) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output)); test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input)); test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter)); test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter),
ADD_TO);
test.run(); test.run();
} }
} }
...@@ -106,6 +127,7 @@ public: ...@@ -106,6 +127,7 @@ public:
ConvolutionTest2(const std::string& conv1, ConvolutionTest2(const std::string& conv1,
const std::string& conv2, const std::string& conv2,
TestType type, TestType type,
bool useGroups = true,
std::string algo = "auto") { std::string algo = "auto") {
for (size_t batchSize : {16}) { for (size_t batchSize : {16}) {
for (size_t inputHeight : {7, 31}) { for (size_t inputHeight : {7, 31}) {
...@@ -113,7 +135,15 @@ public: ...@@ -113,7 +135,15 @@ public:
for (size_t filterHeight : {1, 5}) { for (size_t filterHeight : {1, 5}) {
for (size_t filterWidth : {3, 7}) { for (size_t filterWidth : {3, 7}) {
for (size_t inputChannels : {7}) { for (size_t inputChannels : {7}) {
for (size_t outputChannels : {32}) { for (size_t outputChannels : {7}) {
size_t groups;
if (!useGroups) {
groups = 1;
} else {
if (outputChannels % inputChannels != 0) continue;
groups = inputChannels;
}
size_t stride = 1; size_t stride = 1;
size_t padding = 0; size_t padding = 0;
size_t outputHeight = size_t outputHeight =
...@@ -141,13 +171,24 @@ public: ...@@ -141,13 +171,24 @@ public:
FuncConfig() FuncConfig()
.set("paddings", paddings) .set("paddings", paddings)
.set("strides", strides) .set("strides", strides)
.set("groups", (size_t)1) .set("groups", groups)
.set("algo", algo)); .set("algo", algo));
TensorShape input{ TensorShape input{
batchSize, inputChannels, inputHeight, inputWidth}; batchSize, inputChannels, inputHeight, inputWidth};
TensorShape filter{
outputChannels, inputChannels, filterHeight, filterWidth}; TensorShape filter;
if (groups > 1)
filter = TensorShape({groups,
outputChannels / groups,
inputChannels / groups,
filterHeight,
filterWidth});
else
filter = TensorShape({outputChannels,
inputChannels,
filterHeight,
filterWidth});
TensorShape output{ TensorShape output{
batchSize, outputChannels, outputHeight, outputWidth}; batchSize, outputChannels, outputHeight, outputWidth};
...@@ -164,7 +205,8 @@ public: ...@@ -164,7 +205,8 @@ public:
} else if (type == kBackwardFilterTest) { } else if (type == kBackwardFilterTest) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output)); test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input)); test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter)); test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter),
ADD_TO);
test.run(); test.run();
} }
} }
...@@ -177,34 +219,88 @@ public: ...@@ -177,34 +219,88 @@ public:
} }
}; };
// ======Start Convolution TEST======
TEST(Forward, GEMM) { TEST(Forward, GEMM) {
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU> test( ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU> test(
"NaiveConv-CPU", "GemmConv-CPU", kForwardTest); "NaiveConv-CPU", "GemmConv-CPU", kForwardTest, false);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU> test2( ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU> test2(
"NaiveConv-CPU", "GemmConv-CPU", kForwardTest); "NaiveConv-CPU", "GemmConv-CPU", kForwardTest, false);
} }
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
TEST(Forward, GEMM2) { TEST(Forward, GEMM2) {
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test( ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
"GemmConv-CPU", "GemmConv-GPU", kForwardTest); "GemmConv-CPU", "GemmConv-GPU", kForwardTest, false);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2( ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"GemmConv-CPU", "GemmConv-GPU", kForwardTest); "GemmConv-CPU", "GemmConv-GPU", kForwardTest, false);
} }
TEST(BackwardInput, GEMM) { TEST(BackwardInput, GEMM) {
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test( ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
"GemmConvGradInput-CPU", "GemmConvGradInput-GPU", kBackwardInputTest); "GemmConvGradInput-CPU",
"GemmConvGradInput-GPU",
kBackwardInputTest,
false);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2( ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"GemmConvGradInput-CPU", "GemmConvGradInput-GPU", kBackwardInputTest); "GemmConvGradInput-CPU",
"GemmConvGradInput-GPU",
kBackwardInputTest,
false);
} }
TEST(BackwardFilter, GEMM) { TEST(BackwardFilter, GEMM) {
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test( ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
"GemmConvGradFilter-CPU", "GemmConvGradFilter-GPU", kBackwardFilterTest); "GemmConvGradFilter-CPU",
"GemmConvGradFilter-GPU",
kBackwardFilterTest,
false);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2( ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"GemmConvGradFilter-CPU", "GemmConvGradFilter-GPU", kBackwardFilterTest); "GemmConvGradFilter-CPU",
"GemmConvGradFilter-GPU",
kBackwardFilterTest,
false);
} }
#endif #endif
// ======End Convolution TEST======
// ======Start DepthwiseConvolution TEST======
// TODO(zhaolong) The depthwise convolution cpu test will be added when the cpu
// version of depthwiseConv is implemented.
#ifndef PADDLE_ONLY_CPU
TEST(DepthwiseConvForward, GEMM2) {
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
"GemmConv-CPU", "DepthwiseConv-GPU", kForwardTest);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"GemmConv-CPU", "DepthwiseConv-GPU", kForwardTest);
}
TEST(DepthwiseConvBackwardInput, GEMM) {
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
"GemmConvGradInput-CPU",
"DepthwiseConvGradInput-GPU",
kBackwardInputTest);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"GemmConvGradInput-CPU",
"DepthwiseConvGradInput-GPU",
kBackwardInputTest);
}
TEST(DepthwiseConvBackwardFilter, GEMM) {
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
"GemmConvGradFilter-CPU",
"DepthwiseConvGradFilter-GPU",
kBackwardFilterTest);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"GemmConvGradFilter-CPU",
"DepthwiseConvGradFilter-GPU",
kBackwardFilterTest);
}
#endif
// ======End DepthwiseConvolution TEST======
} // namespace paddle } // namespace paddle
...@@ -36,10 +36,11 @@ __global__ void KeCrop(real* outputs, const real* inputs, ...@@ -36,10 +36,11 @@ __global__ void KeCrop(real* outputs, const real* inputs,
template <> template <>
void Crop<DEVICE_TYPE_GPU>(real* outputs, void Crop<DEVICE_TYPE_GPU>(real* outputs,
const real* inputs, const real* inputs,
const TensorShape inShape, const TensorShape inShape,
const TensorShape outShape, const TensorShape outShape,
const FuncConfig& conf) { const FuncConfig& conf) {
std::vector<uint32_t> crop_corner = conf.get<std::vector<uint32_t>>("crop_corner"); std::vector<uint32_t> crop_corner =
conf.get<std::vector<uint32_t>>("crop_corner");
int cropC = crop_corner[1]; int cropC = crop_corner[1];
int cropH = crop_corner[2]; int cropH = crop_corner[2];
int cropW = crop_corner[3]; int cropW = crop_corner[3];
...@@ -74,7 +75,8 @@ __global__ void KeCropDiff(const real* inGrad, real* outGrad, ...@@ -74,7 +75,8 @@ __global__ void KeCropDiff(const real* inGrad, real* outGrad,
const int c = (idx / inW / inH) % inC; const int c = (idx / inW / inH) % inC;
const int n = idx / inW / inH / inC; const int n = idx / inW / inH / inC;
const int off = ((n * outC + c + cropC) * outH + h + cropH) * outW + cropW + w; const int off =
((n * outC + c + cropC) * outH + h + cropH) * outW + cropW + w;
outGrad[off] += inGrad[idx]; outGrad[off] += inGrad[idx];
} }
...@@ -86,7 +88,8 @@ void CropGrad<DEVICE_TYPE_GPU>(const real* inGrad, ...@@ -86,7 +88,8 @@ void CropGrad<DEVICE_TYPE_GPU>(const real* inGrad,
const TensorShape inShape, const TensorShape inShape,
const TensorShape outShape, const TensorShape outShape,
const FuncConfig& conf) { const FuncConfig& conf) {
std::vector<uint32_t> crop_corner = conf.get<std::vector<uint32_t>>("crop_corner"); std::vector<uint32_t> crop_corner =
conf.get<std::vector<uint32_t>>("crop_corner");
int cropC = crop_corner[1]; int cropC = crop_corner[1];
int cropH = crop_corner[2]; int cropH = crop_corner[2];
int cropW = crop_corner[3]; int cropW = crop_corner[3];
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "DepthwiseConvOp.h"
#include "ConvOp.h"
#include "GemmFunctor.h"
namespace paddle {
template <class T>
class DepthwiseConvFunctor<DEVICE_TYPE_CPU, T> {
public:
void operator()(const T* inputData,
const T* filterData,
int batchSize,
int outputChannels,
int outputHeight,
int outputWidth,
int inputChannels,
int inputHeight,
int inputWidth,
int filterMultiplier,
int filterHeight,
int filterWidth,
int strideH,
int strideW,
int paddingH,
int paddingW,
T* outputData) {
// TODO(zhaolong) : cpu implementation of depthwise convolution
}
};
template <class T>
class DepthwiseConvGradInputFunctor<DEVICE_TYPE_CPU, T> {
public:
void operator()(const T* outputGrad,
const T* filterData,
int batchSize,
int outputChannels,
int outputHeight,
int outputWidth,
int inputChannels,
int inputHeight,
int inputWidth,
int filterMultiplier,
int filterHeight,
int filterWidth,
int strideH,
int strideW,
int paddingH,
int paddingW,
T* inputGrad) {}
// TODO(zhaolong) : cpu implementation of depthwise convolution
};
template <class T>
class DepthwiseConvGradFilterFunctor<DEVICE_TYPE_CPU, T> {
public:
void operator()(const T* outputGrad,
const T* inputData,
int batchSize,
int outputChannels,
int outputHeight,
int outputWidth,
int inputChannels,
int inputHeight,
int inputWidth,
int filterMultiplier,
int filterHeight,
int filterWidth,
int strideH,
int strideW,
int paddingH,
int paddingW,
T* colData,
T* filterGrad) {}
// TODO(zhaolong) : cpu implementation of depthwise convolution
};
/*
* \brief Forward calculation of depthwise convolution.
*/
template <DeviceType Device>
class DepthwiseConvFunction : public ConvFunctionBase {
public:
void init(const FuncConfig& config) override {
ConvFunctionBase::init(config);
}
void check(const BufferArgs& inputs, const BufferArgs& outputs) override {
const TensorShape& input = inputs[0].shape();
const TensorShape& filter = inputs[1].shape();
const TensorShape& output = outputs[0].shape();
checkShape(input, filter, output);
}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(numInputs_, inputs.size());
CHECK_EQ(numOutputs_, outputs.size());
check(inputs, outputs);
const TensorShape& input = inputs[0].shape();
const TensorShape& filter = inputs[1].shape();
const TensorShape& output = outputs[0].shape();
size_t batchSize = input[0];
size_t inputChannels = input[1];
size_t inputHeight = input[2];
size_t inputWidth = input[3];
size_t filterHeight = getFilterHeight(filter);
size_t filterWidth = getFilterWidth(filter);
size_t outputChannels = output[1];
size_t outputHeight = output[2];
size_t outputWidth = output[3];
size_t filterMultiplier = outputChannels / groups_;
CHECK_EQ(inputChannels, groups_);
real* inputData = inputs[0].data<real>();
real* filterData = inputs[1].data<real>();
real* outputData = outputs[0].data<real>();
DepthwiseConvFunctor<Device, real> depthwiseConv;
depthwiseConv(inputData,
filterData,
batchSize,
outputChannels,
outputHeight,
outputWidth,
inputChannels,
inputHeight,
inputWidth,
filterMultiplier,
filterHeight,
filterWidth,
strideH(),
strideW(),
paddingH(),
paddingW(),
outputData);
}
};
/*
* \brief Backward input calculation of depthwise convolution.
*/
template <DeviceType Device>
class DepthwiseConvGradInputFunction : public ConvFunctionBase {
public:
void init(const FuncConfig& config) override {
ConvFunctionBase::init(config);
}
void check(const BufferArgs& inputs, const BufferArgs& outputs) override {
const TensorShape& output = inputs[0].shape();
const TensorShape& filter = inputs[1].shape();
const TensorShape& input = outputs[0].shape();
checkShape(input, filter, output);
}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(numInputs_, inputs.size());
CHECK_EQ(numOutputs_, outputs.size());
CHECK_EQ(outputs[0].getArgType(), ADD_TO);
check(inputs, outputs);
CHECK_EQ(outputs[0].getArgType(), ADD_TO);
const TensorShape& output = inputs[0].shape();
const TensorShape& filter = inputs[1].shape();
const TensorShape& input = outputs[0].shape();
size_t batchSize = input[0];
size_t inputChannels = input[1];
size_t inputHeight = input[2];
size_t inputWidth = input[3];
size_t filterHeight = getFilterHeight(filter);
size_t filterWidth = getFilterWidth(filter);
size_t outputChannels = output[1];
size_t outputHeight = output[2];
size_t outputWidth = output[3];
size_t filterMultiplier = outputChannels / groups_;
CHECK_EQ(inputChannels, groups_);
real* outputGrad = inputs[0].data<real>();
real* filterData = inputs[1].data<real>();
real* inputGrad = outputs[0].data<real>();
DepthwiseConvGradInputFunctor<Device, real> depthwiseConvGradInput;
depthwiseConvGradInput(outputGrad,
filterData,
batchSize,
outputChannels,
outputHeight,
outputWidth,
inputChannels,
inputHeight,
inputWidth,
filterMultiplier,
filterHeight,
filterWidth,
strideH(),
strideW(),
paddingH(),
paddingW(),
inputGrad);
}
};
/*
* \brief Backward filter calculation of depthwise convolution.
*/
template <DeviceType Device>
class DepthwiseConvGradFilterFunction : public ConvFunctionBase {
public:
void init(const FuncConfig& config) override {
ConvFunctionBase::init(config);
}
void check(const BufferArgs& inputs, const BufferArgs& outputs) override {
const TensorShape& output = inputs[0].shape();
const TensorShape& input = inputs[1].shape();
const TensorShape& filter = outputs[0].shape();
checkShape(input, filter, output);
}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(numInputs_, inputs.size());
CHECK_EQ(numOutputs_, outputs.size());
CHECK_EQ(outputs[0].getArgType(), ADD_TO);
check(inputs, outputs);
const TensorShape& output = inputs[0].shape();
const TensorShape& input = inputs[1].shape();
const TensorShape& filter = outputs[0].shape();
size_t batchSize = input[0];
size_t inputChannels = input[1];
size_t inputHeight = input[2];
size_t inputWidth = input[3];
size_t filterHeight = getFilterHeight(filter);
size_t filterWidth = getFilterWidth(filter);
size_t outputChannels = output[1];
size_t outputHeight = output[2];
size_t outputWidth = output[3];
size_t filterMultiplier = outputChannels / groups_;
CHECK_EQ(inputChannels, groups_);
real* outputGrad = inputs[0].data<real>();
real* inputData = inputs[1].data<real>();
real* filterGrad = outputs[0].data<real>();
int size = outputChannels * filterHeight * filterWidth * outputHeight *
outputWidth;
resizeBuffer<Device>(size);
real* colData = reinterpret_cast<real*>(memory_->getBuf());
DepthwiseConvGradFilterFunctor<Device, real> depthwiseConvGradFilter;
depthwiseConvGradFilter(outputGrad,
inputData,
batchSize,
outputChannels,
outputHeight,
outputWidth,
inputChannels,
inputHeight,
inputWidth,
filterMultiplier,
filterHeight,
filterWidth,
strideH(),
strideW(),
paddingH(),
paddingW(),
colData,
filterGrad);
}
};
REGISTER_TYPED_FUNC(DepthwiseConv, CPU, DepthwiseConvFunction);
REGISTER_TYPED_FUNC(DepthwiseConvGradInput,
CPU,
DepthwiseConvGradInputFunction);
REGISTER_TYPED_FUNC(DepthwiseConvGradFilter,
CPU,
DepthwiseConvGradFilterFunction);
#ifndef PADDLE_ONLY_CPU
REGISTER_TYPED_FUNC(DepthwiseConv, GPU, DepthwiseConvFunction);
REGISTER_TYPED_FUNC(DepthwiseConvGradInput,
GPU,
DepthwiseConvGradInputFunction);
REGISTER_TYPED_FUNC(DepthwiseConvGradFilter,
GPU,
DepthwiseConvGradFilterFunction);
#endif
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "TensorType.h"
namespace paddle {
/**
*\brief Depthwise convolution forward. The outputData
* of depthwise convolution is same with ExpandConvLayer
* when groups equals inputChannels in ExpandConvLayer.
*
* \param[in] inputData input data.
* \param[in] filterData the Paramters of the depthwise conv layer..
* \param[in] batchSize batch size of input data.
* \param[in] outputChannels channels of outputData.
* \param[in] outputHeight height of outputData.
* \param[in] outputWidth width of outputData.
* \param[in] inputChannels channels of inputData.
* \param[in] inputHeight height of inputData.
* \param[in] inputWidth width of inputData..
* \param[in] filterMultiplier equals to outputChannels/groups_.
* \param[in] filterHeight height of filter.
* \param[in] filterWidth widht of filter.
* \param[in] strideH stride size in height direction.
* \param[in] strideW stride size in width direction.
* \param[in] paddingH padding size in height direction.
* \param[in] paddingW padding size in width direction.
* \param[out] outputData outputData.
*
*/
template <DeviceType Device, class T>
class DepthwiseConvFunctor {
public:
void operator()(const T* inputData,
const T* filterData,
int batchSize,
int outputChannels,
int outputHeight,
int outputWidth,
int inputChannels,
int inputHeight,
int inputWidth,
int filterMultiplier,
int filterHeight,
int filterWidth,
int strideH,
int strideW,
int paddingH,
int paddingW,
T* outputData);
};
/**
*\brief Functor tot compute the depthwise convolution backprop w.r.t input.
*
*
* \param[in] outputGradData the grad data of output.
* \param[in] filterData the Paramters of the depthwise conv layer..
* \param[in] batchSize batch size of input data.
* \param[in] outputChannels channels of outputData.
* \param[in] outputHeight height of outputData.
* \param[in] outputWidth width of outputData.
* \param[in] inputChannels channels of input data.
* \param[in] inputHeight height of inputData.
* \param[in] inputWidth width of inputData.
* \param[in] filterMultiplier equals to outputChannels/groups_.
* \param[in] filterHeight height of filter.
* \param[in] filterWidth widht of filter.
* \param[in] strideH stride size in height direction.
* \param[in] strideW stride size in width direction.
* \param[in] paddingH padding size in height direction.
* \param[in] paddingW padding size in width direction.
* \param[out] inputGrad the grad data of input.
*
*/
template <DeviceType Device, class T>
class DepthwiseConvGradInputFunctor {
public:
void operator()(const T* outputGrad,
const T* filterData,
int batchSize,
int outputChannels,
int outputHeight,
int outputWidth,
int inputChannels,
int inputHeight,
int inputWidth,
int filterMultiplier,
int filterHeight,
int filterWidth,
int strideH,
int strideW,
int paddingH,
int paddingW,
T* inputGrad);
};
/**
*\brief Functor tot compute the depthwise convolution backprop w.r.t filter.
*
* \param[in] outputGradData the grad data of output.
* \param[in] inputData inputData.
* \param[in] batchSize batch size of input data.
* \param[in] outputChannels channels of outputData.
* \param[in] outputHeight height of outputData.
* \param[in] outputWidth width of outputData.
* \param[in] inputChannels channels of input data.
* \param[in] inputHeight height of inputData.
* \param[in] inputWidth width of inputData.
* \param[in] filterMultiplier equals to outputChannels/groups_.
* \param[in] filterHeight height of filter.
* \param[in] filterWidth widht of filter.
* \param[in] strideH stride size in height direction.
* \param[in] strideW stride size in width direction.
* \param[in] paddingH padding size in height direction.
* \param[in] paddingW padding size in width direction.
* \param[in] colData Auxiliary data when calculating filterGrad.
* \param[in] multiplierData Auxiliary data when calculating filterGrad.
* \param[out] filterGrad the grad data of filter.
*
*/
template <DeviceType Device, class T>
class DepthwiseConvGradFilterFunctor {
public:
void operator()(const T* outputGrad,
const T* inputData,
int batchSize,
int outputChannels,
int outputHeight,
int outputWidth,
int inputChannels,
int inputHeight,
int inputWidth,
int filterMultiplier,
int filterHeight,
int filterWidth,
int strideH,
int strideW,
int paddingH,
int paddingW,
T* colData,
T* filterGrad);
};
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "DepthwiseConvOp.h"
#include "GemmFunctor.h"
#include "paddle/math/BaseMatrix.h"
namespace paddle {
// CUDA kernel to compute the depthwise convolution forward pass
template <class T>
__global__
void ConvolutionDepthwiseForward(const int nthreads,
const T* const inputData, const T* const filterData,
const int batchSize, const int outputChannels, const int outputHeight,
const int outputWidth, const int inputChannels, const int inputHeight,
const int inputWidth, const int filterMultiplier, const int filterHeight,
const int filterWidth, const int strideH, const int strideW,
const int paddingH, const int paddingW, T* const outputData) {
int index =
(blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x;
if (index < nthreads) {
const int batch = index / outputChannels / outputHeight / outputWidth;
const int c_out = (index / outputHeight / outputWidth) % outputChannels;
const int h_out = (index / outputWidth) % outputHeight;
const int w_out = index % outputWidth;
const int c_in = c_out / filterMultiplier;
const T* weight = filterData + c_out * filterHeight * filterWidth;
T value = 0;
const int h_in_start = -paddingH + h_out * strideH;
const int w_in_start = -paddingW + w_out * strideW;
const int h_in_end = -paddingH + h_out * strideH + filterHeight - 1;
const int w_in_end = -paddingW + w_out * strideW + filterWidth - 1;
if ((h_in_start >= 0) && (h_in_end < inputHeight)
&& (w_in_start >= 0) && (w_in_end < inputWidth)) {
for (int kh = 0; kh < filterHeight; ++kh) {
for (int kw = 0; kw < filterWidth; ++kw) {
const int h_in = -paddingH + h_out * strideH + kh;
const int w_in = -paddingW + w_out * strideW + kw;
const int offset = ((batch * inputChannels + c_in)
* inputHeight + h_in) * inputWidth + w_in;
value += (*weight) * inputData[offset];
++weight;
}
}
} else {
for (int kh = 0; kh < filterHeight; ++kh) {
for (int kw = 0; kw < filterWidth; ++kw) {
const int h_in = -paddingH + h_out * strideH + kh;
const int w_in = -paddingW + w_out * strideW + kw;
if ((h_in >= 0) && (h_in < inputHeight)
&& (w_in >= 0) && (w_in < inputWidth)) {
const int offset = ((batch * inputChannels + c_in)
* inputHeight + h_in) * inputWidth + w_in;
value += (*weight) * inputData[offset];
}
++weight;
}
}
}
outputData[index] = value;
}
}
// CUDA kernel to compute the depthwise convolution backprop w.r.t input.
template <class T>
__global__
void ConvolutionDepthwiseInputBackward(const int nthreads,
const T* const top_diff, const T* const weight_data,
const int num, const int outputChannels, const int outputHeight,
const int outputWidth, const int inputChannels, const int inputHeight,
const int inputWidth, const int filterMultiplier, const int filterHeight,
const int filterWidth, const int strideH, const int strideW,
const int paddingH, const int paddingW, T* const bottom_diff) {
int index =
(blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x;
if (index < nthreads) {
const int batch = index / inputChannels / inputHeight / inputWidth;
const int c_in = (index / inputHeight / inputWidth) % inputChannels;
const int h_in = (index / inputWidth) % inputHeight;
const int w_in = index % inputWidth;
const int c_out_start = c_in * filterMultiplier;
int h_out_start = (h_in - filterHeight + paddingH + strideH)/strideH;
h_out_start = 0 > h_out_start ? 0 : h_out_start;
int h_out_end = (h_in + paddingH)/strideH;
h_out_end = outputHeight - 1 < h_out_end? outputHeight - 1 : h_out_end;
int w_out_start = (w_in - filterWidth + paddingW + strideW)/strideW;
w_out_start = 0 > w_out_start ? 0 : w_out_start;
int w_out_end = (w_in + paddingW)/strideW;
w_out_end = outputWidth - 1 < w_out_end? outputWidth - 1 : w_out_end;
T value = 0;
for (int c_out = c_out_start;
c_out < c_out_start + filterMultiplier; c_out ++) {
for (int h_out = h_out_start; h_out <= h_out_end; ++h_out) {
const int filter_h = h_in + paddingH - h_out * strideH;
for (int w_out = w_out_start; w_out <= w_out_end; ++w_out) {
const int filter_w = w_in + paddingW - w_out * strideW;
const int filter_offset = c_out * filterHeight * filterWidth
+ filter_h * filterWidth + filter_w;
const int top_diff_offset = ((batch * outputChannels + c_out) *
outputHeight + h_out)* outputWidth + w_out;
value += top_diff[top_diff_offset] * weight_data[filter_offset];
}
}
}
bottom_diff[index] += value;
}
}
// CUDA kernel to compute the depthwise convolution backprop w.r.t filter.
template <class T>
__global__
void ConvolutionDepthwiseFilterBackward(const int num_i, const int nthreads,
const T* const top_diff, const T* const inputData,
const int num, const int outputChannels, const int outputHeight,
const int outputWidth, const int inputChannels, const int inputHeight,
const int inputWidth, const int filterMultiplier, const int filterHeight,
const int filterWidth, const int strideH, const int strideW,
const int paddingH, const int paddingW, T* const buffer_data) {
int index =
(blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x;
if (index < nthreads) {
const int h_out = (index / outputWidth) % outputHeight;
const int w_out = index % outputWidth;
const int kh = (index / filterWidth / outputHeight / outputWidth)
% filterHeight;
const int kw = (index / outputHeight / outputWidth) % filterWidth;
const int h_in = -paddingH + h_out * strideH + kh;
const int w_in = -paddingW + w_out * strideW + kw;
if ((h_in >= 0) && (h_in < inputHeight)
&& (w_in >= 0) && (w_in < inputWidth)) {
const int c_out = index /
(filterHeight * filterWidth * outputHeight * outputWidth);
const int c_in = c_out / filterMultiplier;
const int batch = num_i;
const int top_offset = ((batch * outputChannels + c_out) *
outputHeight + h_out) * outputWidth + w_out;
const int bottom_offset = ((batch * inputChannels + c_in)
* inputHeight + h_in) * inputWidth + w_in;
buffer_data[index] = top_diff[top_offset] * inputData[bottom_offset];
} else {
buffer_data[index] = 0;
}
}
}
template <class T>
class DepthwiseConvFunctor<DEVICE_TYPE_GPU, T>{
public:
void operator()(const T* inputData,
const T* filterData,
int batchSize,
int outputChannels,
int outputHeight,
int outputWidth,
int inputChannels,
int inputHeight,
int inputWidth,
int filterMultiplier,
int filterHeight,
int filterWidth,
int strideH,
int strideW,
int paddingH,
int paddingW,
T* outputData){
int outputSize = batchSize * outputChannels * outputHeight * outputWidth;
size_t blocks = (outputSize + 1024 -1) / 1024;
size_t blockX = 512;
size_t blockY = (blocks+512-1)/512;
dim3 threads(1024, 1);
dim3 grid(blockX, blockY);
ConvolutionDepthwiseForward<T>
<<< grid, threads, 0, STREAM_DEFAULT >>>(
outputSize,
inputData,
filterData,
batchSize,
outputChannels,
outputHeight,
outputWidth,
inputChannels,
inputHeight,
inputWidth,
filterMultiplier,
filterHeight,
filterWidth,
strideH,
strideW,
paddingH,
paddingW,
outputData);
}
};
template <class T>
class DepthwiseConvGradInputFunctor<DEVICE_TYPE_GPU, T>{
public:
void operator()(const T* outputGrad,
const T* filterData,
int batchSize,
int outputChannels,
int outputHeight,
int outputWidth,
int inputChannels,
int inputHeight,
int inputWidth,
int filterMultiplier,
int filterHeight,
int filterWidth,
int strideH,
int strideW,
int paddingH,
int paddingW,
T* inputGrad){
int inputSize = batchSize * inputChannels * inputHeight * inputWidth;
size_t blocks = (inputSize + 1024 -1) / 1024;
size_t blockX = 512;
size_t blockY = (blocks+512-1)/512;
dim3 threads(1024, 1);
dim3 grid(blockX, blockY);
ConvolutionDepthwiseInputBackward<T>
// NOLINT_NEXT_LINE(whitespace/operators)
<<< grid, threads, 0, STREAM_DEFAULT >>>(
inputSize,
outputGrad,
filterData,
batchSize,
outputChannels,
outputHeight,
outputWidth,
inputChannels,
inputHeight,
inputWidth,
filterMultiplier,
filterHeight,
filterWidth,
strideH,
strideW,
paddingH,
paddingW,
inputGrad);
}
};
template <class T>
class DepthwiseConvGradFilterFunctor<DEVICE_TYPE_GPU, T> {
public:
void operator()(const T* outputGrad,
const T* inputData,
int batchSize,
int outputChannels,
int outputHeight,
int outputWidth,
int inputChannels,
int inputHeight,
int inputWidth,
int filterMultiplier,
int filterHeight,
int filterWidth,
int strideH,
int strideW,
int paddingH,
int paddingW,
T* colData,
T* filterGrad){
int colDataSize = outputChannels * filterHeight * filterWidth
* outputHeight * outputWidth;
size_t blocks = (colDataSize + 1024 -1) / 1024;
size_t blockX = 512;
size_t blockY = (blocks+512-1)/512;
dim3 threads(1024, 1);
dim3 grid(blockX, blockY);
BaseMatrix filterGradMatrix(outputChannels * filterHeight * filterWidth,
1, filterGrad, false, true);
for (int i = 0; i < batchSize; i++) {
ConvolutionDepthwiseFilterBackward<T>
<<< grid, threads, 0, STREAM_DEFAULT >>>(
i,
colDataSize,
outputGrad,
inputData,
batchSize,
outputChannels,
outputHeight,
outputWidth,
inputChannels,
inputHeight,
inputWidth,
filterMultiplier,
filterHeight,
filterWidth,
strideH,
strideW,
paddingH,
paddingW,
colData);
int K = outputHeight * outputWidth;
int M = colDataSize / K;
BaseMatrix colMatrix(M, K, colData, false, true);
filterGradMatrix.sumRows(colMatrix, (T)1.0, (T)1.0);
}
}
};
#ifdef PADDLE_TYPE_DOUBLE
template class DepthwiseConvGradInputFunctor<DEVICE_TYPE_GPU, double>;
template class DepthwiseConvFunctor<DEVICE_TYPE_GPU, double>;
template class DepthwiseConvGradFilterFunctor<DEVICE_TYPE_GPU, double>;
#else
template class DepthwiseConvGradInputFunctor<DEVICE_TYPE_GPU, float>;
template class DepthwiseConvFunctor<DEVICE_TYPE_GPU, float>;
template class DepthwiseConvGradFilterFunctor<DEVICE_TYPE_GPU, float>;
#endif
} // namespace paddle
...@@ -38,10 +38,25 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, ...@@ -38,10 +38,25 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
inputShape_.resize(numInputs); inputShape_.resize(numInputs);
filterShape_.resize(numInputs); filterShape_.resize(numInputs);
outputShape_.resize(numInputs); outputShape_.resize(numInputs);
std::string convType;
std::string convGradInputType;
std::string convGradFilterType;
for (int i = 0; i < config_.inputs_size(); i++) { for (int i = 0; i < config_.inputs_size(); i++) {
std::vector<size_t> paddings = {(size_t)paddingY_[i], (size_t)padding_[i]}; std::vector<size_t> paddings = {(size_t)paddingY_[i], (size_t)padding_[i]};
std::vector<size_t> strides = {(size_t)strideY_[i], (size_t)stride_[i]}; std::vector<size_t> strides = {(size_t)strideY_[i], (size_t)stride_[i]};
if (useGpu_ && (size_t)groups_[i] == (size_t)channels_[i] && !isDeconv_) {
convType = "DepthwiseConv";
convGradInputType = "DepthwiseConvGradInput";
convGradFilterType = "DepthwiseConvGradFilter";
} else {
convType = "GemmConv";
convGradInputType = "GemmConvGradInput";
convGradFilterType = "GemmConvGradFilter";
}
if (FLAGS_use_nnpack) { if (FLAGS_use_nnpack) {
CHECK_EQ(isDeconv_, false); CHECK_EQ(isDeconv_, false);
createFunction(forward_, createFunction(forward_,
...@@ -53,21 +68,21 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, ...@@ -53,21 +68,21 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
.set("algo", std::string("auto"))); .set("algo", std::string("auto")));
} else { } else {
createFunction(forward_, createFunction(forward_,
!isDeconv_ ? "GemmConv" : "GemmConvGradInput", !isDeconv_ ? convType : convGradInputType,
FuncConfig() FuncConfig()
.set("paddings", paddings) .set("paddings", paddings)
.set("strides", strides) .set("strides", strides)
.set("groups", (size_t)groups_[i])); .set("groups", (size_t)groups_[i]));
createFunction(backward_, createFunction(backward_,
!isDeconv_ ? "GemmConvGradInput" : "GemmConv", !isDeconv_ ? convGradInputType : convType,
FuncConfig() FuncConfig()
.set("paddings", paddings) .set("paddings", paddings)
.set("strides", strides) .set("strides", strides)
.set("groups", (size_t)groups_[i])); .set("groups", (size_t)groups_[i]));
createFunction(backward_, createFunction(backward_,
"GemmConvGradFilter", convGradFilterType,
FuncConfig() FuncConfig()
.set("paddings", paddings) .set("paddings", paddings)
.set("strides", strides) .set("strides", strides)
......
...@@ -347,6 +347,55 @@ TEST(Layer, CosSimVecMatLayer) { ...@@ -347,6 +347,55 @@ TEST(Layer, CosSimVecMatLayer) {
} }
} }
void testDepthwiseConvLayer(const string& type, bool useGpu) {
TestConfig config;
config.biasSize = 32;
config.layerConfig.set_type(type);
config.layerConfig.set_num_filters(32);
config.layerConfig.set_partial_sum(1);
config.layerConfig.set_shared_biases(true);
config.inputDefs.push_back({INPUT_DATA, "layer_0", 2048, 192});
LayerInputConfig* input = config.layerConfig.add_inputs();
ConvConfig* conv = input->mutable_conv_conf();
conv->set_filter_size(2);
conv->set_filter_size_y(3);
conv->set_channels(16);
conv->set_padding(0);
conv->set_padding_y(1);
conv->set_stride(2);
conv->set_stride_y(2);
conv->set_groups(16);
conv->set_filter_channels(conv->channels() / conv->groups());
conv->set_img_size(16);
conv->set_img_size_y(8);
conv->set_output_x(outputSize(conv->img_size(),
conv->filter_size(),
conv->padding(),
conv->stride(),
/* caffeMode */ true));
conv->set_output_y(outputSize(conv->img_size_y(),
conv->filter_size_y(),
conv->padding_y(),
conv->stride_y(),
/* caffeMode */ true));
config.layerConfig.set_size(conv->output_x() * conv->output_y() *
config.layerConfig.num_filters());
testLayerGrad(config, "depthwise_conv", 100, false, useGpu);
// Use small batch_size and useWeight=true to test biasGrad
testLayerGrad(config, "depthwise_conv", 2, false, useGpu, true, 0.02);
}
TEST(Layer, depthwiseConvLayer) {
// 'depthwise_conv' is a sepecial case of 'exconv' whose
// groups size equals to the input channels size.
testDepthwiseConvLayer("exconv", /* useGpu= */ false);
#ifndef PADDLE_ONLY_CPU
testDepthwiseConvLayer("exconv", /* useGpu= */ true);
#endif
}
void testConvLayer(const string& type, bool trans, bool useGpu) { void testConvLayer(const string& type, bool trans, bool useGpu) {
TestConfig config; TestConfig config;
config.biasSize = 16; config.biasSize = 16;
......
...@@ -202,7 +202,7 @@ double dotProduct<double>(const int n, const double* x, const double* y) { ...@@ -202,7 +202,7 @@ double dotProduct<double>(const int n, const double* x, const double* y) {
return cblas_ddot(n, x, 1, y, 1); return cblas_ddot(n, x, 1, y, 1);
} }
#ifdef PADDLE_USE_MKL #if defined(PADDLE_USE_MKL) || defined(PADDLE_USE_MKLML)
template <> template <>
void vExp<float>(const int n, const float* a, float* r) { void vExp<float>(const int n, const float* a, float* r) {
...@@ -243,7 +243,55 @@ template <> ...@@ -243,7 +243,55 @@ template <>
void vAdd<double>(const int n, const double* a, const double* b, double* r) { void vAdd<double>(const int n, const double* a, const double* b, double* r) {
vdAdd(n, a, b, r); vdAdd(n, a, b, r);
} }
#else
DEFINE_MATRIX_BINARY_OP(vExp, b = std::exp(a));
template <class T>
void vExp(const int n, const T* a, T* r) {
hl_cpu_apply_binary_op<T, binary::vExp<T>, 0, 0>(
binary::vExp<T>(), const_cast<T*>(a), r, 1, n, n, n);
}
DEFINE_MATRIX_BINARY_OP(vLog, b = std::log(a));
template <class T>
void vLog(const int n, const T* a, T* r) {
hl_cpu_apply_binary_op<T, binary::vLog<T>, 0, 0>(
binary::vLog<T>(), const_cast<T*>(a), r, 1, n, n, n);
}
DEFINE_MATRIX_BINARY_PARAMETER_OP(vPow, ONE_PARAMETER, b = std::pow(a, p));
template <class T>
void vPow(const int n, const T* a, const T b, T* r) {
hl_cpu_apply_binary_op<T, binary::vPow<T>, 0, 0>(
binary::vPow<T>(b), const_cast<T*>(a), r, 1, n, n, n);
}
DEFINE_MATRIX_TERNARY_OP(vAdd, c = a + b);
template <class T>
void vAdd(const int n, const T* a, const T* b, T* r) {
hl_cpu_apply_ternary_op<T, ternary::vAdd<T>, 0, 0>(ternary::vAdd<T>(),
const_cast<T*>(a),
const_cast<T*>(b),
r,
1,
n,
n,
n,
n);
}
template void vExp(const int n, const float* a, float* r);
template void vExp(const int n, const double* a, double* r);
template void vLog(const int n, const float* a, float* r);
template void vLog(const int n, const double* a, double* r);
template void vPow(const int n, const float* a, const float b, float* r);
template void vPow(const int n, const double* a, const double b, double* r);
template void vAdd(const int n, const float* a, const float* b, float* r);
template void vAdd(const int n, const double* a, const double* b, double* r);
#endif
#ifdef PADDLE_USE_MKL
template <> template <>
void vInvSqrt<float>(const int n, const float* a, float* r) { void vInvSqrt<float>(const int n, const float* a, float* r) {
vsInvSqrt(n, a, r); vsInvSqrt(n, a, r);
...@@ -275,20 +323,6 @@ void vTanh<double>(const int n, const double* a, double* r) { ...@@ -275,20 +323,6 @@ void vTanh<double>(const int n, const double* a, double* r) {
} }
#else #else
DEFINE_MATRIX_BINARY_OP(vExp, b = std::exp(a));
template <class T>
void vExp(const int n, const T* a, T* r) {
hl_cpu_apply_binary_op<T, binary::vExp<T>, 0, 0>(
binary::vExp<T>(), const_cast<T*>(a), r, 1, n, n, n);
}
DEFINE_MATRIX_BINARY_OP(vLog, b = std::log(a));
template <class T>
void vLog(const int n, const T* a, T* r) {
hl_cpu_apply_binary_op<T, binary::vLog<T>, 0, 0>(
binary::vLog<T>(), const_cast<T*>(a), r, 1, n, n, n);
}
DEFINE_MATRIX_BINARY_OP(vInvSqrt, b = 1.0f / std::sqrt(a)); DEFINE_MATRIX_BINARY_OP(vInvSqrt, b = 1.0f / std::sqrt(a));
template <class T> template <class T>
void vInvSqrt(const int n, const T* a, T* r) { void vInvSqrt(const int n, const T* a, T* r) {
...@@ -312,41 +346,12 @@ void vTanh(const int n, const T* a, T* r) { ...@@ -312,41 +346,12 @@ void vTanh(const int n, const T* a, T* r) {
binary::vTanh<T>(), const_cast<T*>(a), r, 1, n, n, n); binary::vTanh<T>(), const_cast<T*>(a), r, 1, n, n, n);
} }
DEFINE_MATRIX_BINARY_PARAMETER_OP(vPow, ONE_PARAMETER, b = std::pow(a, p));
template <class T>
void vPow(const int n, const T* a, const T b, T* r) {
hl_cpu_apply_binary_op<T, binary::vPow<T>, 0, 0>(
binary::vPow<T>(b), const_cast<T*>(a), r, 1, n, n, n);
}
DEFINE_MATRIX_TERNARY_OP(vAdd, c = a + b);
template <class T>
void vAdd(const int n, const T* a, const T* b, T* r) {
hl_cpu_apply_ternary_op<T, ternary::vAdd<T>, 0, 0>(ternary::vAdd<T>(),
const_cast<T*>(a),
const_cast<T*>(b),
r,
1,
n,
n,
n,
n);
}
template void vExp(const int n, const float* a, float* r);
template void vExp(const int n, const double* a, double* r);
template void vLog(const int n, const float* a, float* r);
template void vLog(const int n, const double* a, double* r);
template void vInvSqrt(const int n, const double* a, double* r); template void vInvSqrt(const int n, const double* a, double* r);
template void vInvSqrt(const int n, const float* a, float* r); template void vInvSqrt(const int n, const float* a, float* r);
template void vLog1p(const int n, const float* a, float* r); template void vLog1p(const int n, const float* a, float* r);
template void vLog1p(const int n, const double* a, double* r); template void vLog1p(const int n, const double* a, double* r);
template void vTanh(const int n, const float* a, float* r); template void vTanh(const int n, const float* a, float* r);
template void vTanh(const int n, const double* a, double* r); template void vTanh(const int n, const double* a, double* r);
template void vPow(const int n, const float* a, const float b, float* r);
template void vPow(const int n, const double* a, const double b, double* r);
template void vAdd(const int n, const float* a, const float* b, float* r);
template void vAdd(const int n, const double* a, const double* b, double* r);
#endif #endif
......
...@@ -15,6 +15,12 @@ limitations under the License. */ ...@@ -15,6 +15,12 @@ limitations under the License. */
#ifndef MATHFUNCTIONS_H_ #ifndef MATHFUNCTIONS_H_
#define MATHFUNCTIONS_H_ #define MATHFUNCTIONS_H_
#ifdef PADDLE_USE_MKLML
#include <mkl_cblas.h>
#include <mkl_lapacke.h>
#include <mkl_vml_functions.h>
#endif
#ifdef PADDLE_USE_MKL #ifdef PADDLE_USE_MKL
#include <mkl.h> #include <mkl.h>
#include <mkl_lapacke.h> #include <mkl_lapacke.h>
......
## Design # Region-based Heterogeneous Memory Management
### Usage Please check out the [design documentation](http://gangliao.me) to find out more details about
buddy memory allocator for both CPU and GPU.
To allocate 4KB CPU memory:
```cpp
p = memory::Alloc(platform::CPUPlace(), 4*1024);
```
To allocate 4KB memory on the 3rd GPU:
```cpp
p = memory::Alloc(platform::GPUPlace(2), 4*1024);
```
To free memory and check the so-far used amount of memory on a place:
```cpp
auto pl = platform::GPUPlace(0);
p = memory::Alloc(pl, 4*1024);
cout << memory::Used(pl);
memory::Free(pl, p);
```
### API
In `paddle/memory/memory.h` we have:
```cpp
namespace memory {
template <typename Place> void* Alloc(Place, size_t);
template <typename Place> void Free(Place, void*);
template <typename Place> size_t Used(Place);
} // namespace memory
```
These function templates have specializations on either `platform::CPUPlace` or `platform::GPUPlace`:
```cpp
template<>
void* Alloc<CPUPlace>(CPUPlace p, size_t size) {
return GetCPUBuddyAllocator()->Alloc(size);
}
```
and
```cpp
template<>
void Alloc<GPUPlace>(GPUPlace p, size_t size) {
return GetGPUBuddyAllocator(p.id)->Alloc(size);
}
```
Similar specializations exist for `Free` and `Used`.
### Implementation
`GetCPUBuddyAllocator` and `GetGPUBuddyAllocator` are singletions.
```cpp
BuddyAllocator* GetCPUBuddyAllocator() {
static BuddyAllocator* a = NULL;
if (a == NULL) {
a = new BuddyAllocator(new CPUAllocator /*backup allocator*/, ...);
}
return a;
}
BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
static BuddyAllocator* as = NULL;
if (as == NULL) {
as = new BuddyAllocator*[platform::NumGPUs()];
for (int gpu = 0; gpu < platform::NumGPUs(); gpu++) {
as[gpu] = new BuddyAllocator(new GPUAllocator(gpu) /* backup allocator */, ...);
}
}
return as[gpu_id);
```
#### `BuddyAllocator`
`BuddyAllocator` implements the buddy allocation algorithm. Its constructor takes parameters only related with the algorithm:
```cpp
BuddyAllocator::BuddyAllocator(initial_pool_size, max_pool_size) {
...
}
```
Please be aware that **`BuddyAllocator` always allocate aligned memory**, aligned on 32-bytes, which can hold a `BuddyAllocator::Block` object:
```cpp
class BuddyAllocator {
private:
struct Block {
size_t size;
Block* left, right;
size_t index; // allocator id
};
...
};
```
Because BuddyAllocator has the meta-data of each block, it can trace the used memory -- record the amount returned by `Alloc` freed in `Free`. Instead, `CPUAllocator` and `GPUAllocator` doesn't know the size of freed memory block and cannot do the trace.
#### System Allocators
The `GPUAllocator` and `CPUAllocator` are calls *system allocators*. They work as the fallback allocators of `BuddyAllocator`.
## Justification
I got inspiration from Majel and Caffe2, though above design look different from both.
### Caffe2
In Caffe2, `Tensor<Context>::mutable_data()` allocates the memroy. In particular, [`Tensor<Context>::mutable_data`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/tensor.h#L523) calls [`Tensor<Context>::raw_mutable_data`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/tensor.h#L459), which in turn calls [`Context::New`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/tensor.h#L479).
There are two implementations of `Context`:
1. [`CPUContext`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.h#L105), whose [`New` method](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.h#L131) calls [`g_cpu_allocator.get()->New(size_t)`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.cc#L15) to allocate the memory.
1. [`CUDAContext`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.h#L99), which has a data member [`int gpu_id_`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.h#L202). This looks very similar to class `majel::GPUPlace`, who also has an `int id_` data member. `CUDAContext::New(size_t)` calls [`g_cub_allocator->DeviceAllocate(&ptr, nbytes)`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.cu#L355) to allocate the memory.
### Majel
In Majel, there are basically two allocator types:
1. `cpu::SystemAllocator`, which has similar functionality to `caffe2::CPUContext::New/Delete`.
1. `gpu::SystemAllocator`, which has similar functionality to `caffe2::CUDAContext::New/Delete`.
However, memory allocation is not via these two allocators. Instead, these two allocators are defined in hidden namespaces.
In Majel there are hidden global variables like:
1. `cpu::SystemAllocator g_cpu_allocator`, and
1. `vector<gpu::SystemAllocator*> g_gpu_allocators(NUM_GPUS)`.
Programs allocate memory via a BuddyAllocator, which can take the `g_cpu_allocator` or a `g_gpu_allocators[gpu_id]` as its *fallback allocator*, so that if BuddyAllocator cannot find a block in its memory pool, it extends its memory pool by calling the fallback allocator's `New(size_t)`.
...@@ -38,5 +38,17 @@ void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num, ...@@ -38,5 +38,17 @@ void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num,
cudaStream_t stream); cudaStream_t stream);
#endif // PADDLE_ONLY_CPU #endif // PADDLE_ONLY_CPU
template <typename T, /* must be POD types */
typename Place /* platform::GPUPlace or platform::CPUPlace */,
typename std::enable_if<std::is_pod<T>::value>::type* = nullptr>
class PODDeleter {
public:
PODDeleter(Place place) : place_(place) {}
void operator()(T* ptr) { Free(place_, static_cast<void*>(ptr)); }
private:
Place place_;
};
} // namespace memory } // namespace memory
} // namespace paddle } // namespace paddle
...@@ -48,6 +48,7 @@ op_library(mul_op SRCS mul_op.cc mul_op.cu) ...@@ -48,6 +48,7 @@ op_library(mul_op SRCS mul_op.cc mul_op.cu)
op_library(rowwise_add_op SRCS rowwise_add_op.cu rowwise_add_op.cc) op_library(rowwise_add_op SRCS rowwise_add_op.cu rowwise_add_op.cc)
op_library(sigmoid_op SRCS sigmoid_op.cu sigmoid_op.cc) op_library(sigmoid_op SRCS sigmoid_op.cu sigmoid_op.cc)
op_library(softmax_op SRCS softmax_op.cc softmax_op.cu) op_library(softmax_op SRCS softmax_op.cc softmax_op.cu)
op_library(cross_entropy_op SRCS cross_entropy_op.cc cross_entropy_op.cu)
op_library(fc_op SRCS fc_op.cc DEPS mul_op rowwise_add_op sigmoid_op op_library(fc_op SRCS fc_op.cc DEPS mul_op rowwise_add_op sigmoid_op
softmax_op net) softmax_op net)
......
...@@ -31,7 +31,7 @@ protected: ...@@ -31,7 +31,7 @@ protected:
"Inputs/Outputs of AddOp must all be set"); "Inputs/Outputs of AddOp must all be set");
PADDLE_ENFORCE(inputs[0]->dims() == inputs[1]->dims(), PADDLE_ENFORCE(inputs[0]->dims() == inputs[1]->dims(),
"Two input of Add Op's dimension must be same."); "Two input of Add Op's dimension must be same.");
outputs[0]->set_dims(inputs[0]->dims()); outputs[0]->Resize(inputs[0]->dims());
} }
}; };
...@@ -53,6 +53,5 @@ The equation is: Out = X + Y ...@@ -53,6 +53,5 @@ The equation is: Out = X + Y
} // namespace paddle } // namespace paddle
REGISTER_OP(add_two, paddle::operators::AddOp, paddle::operators::AddOpMaker); REGISTER_OP(add_two, paddle::operators::AddOp, paddle::operators::AddOpMaker);
typedef paddle::operators::AddKernel<::paddle::platform::CPUPlace, float> REGISTER_OP_CPU_KERNEL(
AddKernel_CPU_float; add_two, paddle::operators::AddKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(add_two, AddKernel_CPU_float);
#include "paddle/operators/add_op.h" #include "paddle/operators/add_op.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
typedef paddle::operators::AddKernel<::paddle::platform::GPUPlace, float> AddKernel_GPU_float;
REGISTER_OP_GPU_KERNEL(add_two, REGISTER_OP_GPU_KERNEL(add_two,
AddKernel_GPU_float); paddle::operators::AddKernel<paddle::platform::GPUPlace, float>);
\ No newline at end of file \ No newline at end of file
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/cross_entropy_op.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/tensor.h"
namespace paddle {
namespace operators {
class OnehotCrossEntropyOp : public framework::OperatorWithKernel {
protected:
void InferShape(
const std::vector<const framework::Tensor *> &inputs,
const std::vector<framework::Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 2,
"Input size of OnehotCrossEntropyOp must be two");
PADDLE_ENFORCE(outputs.size() == 1,
"Output size of OnehotCrossEntropyOp must be one");
PADDLE_ENFORCE(inputs[0] != nullptr && inputs[1] != nullptr,
"Inputs of OnehotCrossEntropyOp must all be set");
PADDLE_ENFORCE(outputs[0] != nullptr,
"Outputs of OnehotCrossEntropyOp must all be set");
PADDLE_ENFORCE(inputs[0]->dims().size() == 2, "X's dimension must be 2.");
PADDLE_ENFORCE(outputs[0]->dims().size() == 1,
"label's dimension must be 1.");
outputs[0]->Resize(framework::make_ddim({inputs[0]->dims()[0]}));
}
};
class OnehotCrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker {
public:
OnehotCrossEntropyOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The first input of OnehotCrossEntropyOp");
AddInput("label", "The second input of OnehotCrossEntropyOp");
AddOutput("Y", "The output of OnehotCrossEntropyOp");
AddComment(R"DOC(
OnehotCrossEntropy Operator.
Y[i] = -log(X[i][j])
)DOC");
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP(onehot_cross_entropy,
paddle::operators::OnehotCrossEntropyOp,
paddle::operators::OnehotCrossEntropyOpMaker);
REGISTER_OP_CPU_KERNEL(
onehot_cross_entropy,
paddle::operators::OnehotCrossEntropyOpKernel<::paddle::platform::CPUPlace,
float>);
#include "paddle/operators/cross_entropy_op.h"
#include "paddle/framework/op_registry.h"
REGISTER_OP_GPU_KERNEL(onehot_cross_entropy,
paddle::operators::OnehotCrossEntropyOpKernel<
::paddle::platform::GPUPlace, float>);
\ No newline at end of file
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "glog/logging.h"
#include "paddle/framework/operator.h"
namespace paddle {
namespace operators {
template <typename Place, typename T>
class OnehotCrossEntropyOpKernel : public framework::OpKernel {
public:
constexpr T LOG_THRESHOLD() const { return static_cast<T>(1e-20); }
void Compute(const framework::KernelContext& context) const override {
auto X = context.Input(0)->Get<framework::Tensor>();
const T* X_data = X.data<T>();
const int* label_data =
context.Input(1)->Get<framework::Tensor>().data<int>();
auto* Y = context.Output(0)->GetMutable<framework::Tensor>();
Y->mutable_data<T>(context.GetPlace());
T* Y_data = Y->data<T>();
int batch_size = X.dims()[0];
int class_num = X.dims()[1];
// Y[i] = -log(X[i][j])
for (int i = 0; i < batch_size; ++i) {
Y_data[i] = -std::log(
std::max(X_data[i * class_num + label_data[i]], LOG_THRESHOLD()));
}
}
};
} // namespace operators
} // namespace paddle
...@@ -12,9 +12,9 @@ ...@@ -12,9 +12,9 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <paddle/framework/op_registry.h> #include "paddle/operators/mul_op.h"
#include <paddle/framework/tensor.h> #include "paddle/framework/op_registry.h"
#include <paddle/operators/mul_op.h> #include "paddle/framework/tensor.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -33,7 +33,7 @@ protected: ...@@ -33,7 +33,7 @@ protected:
dim0[1] == dim1[0], dim0[1] == dim1[0],
"First matrix's width must be equal with second matrix's height."); "First matrix's width must be equal with second matrix's height.");
PADDLE_ENFORCE(outputs.size() == 1, "The mul op must take one output"); PADDLE_ENFORCE(outputs.size() == 1, "The mul op must take one output");
outputs[0]->set_dims({dim0[0], dim1[1]}); outputs[0]->Resize({dim0[0], dim1[1]});
} }
}; };
...@@ -57,4 +57,4 @@ The equation is: Out = X * Y ...@@ -57,4 +57,4 @@ The equation is: Out = X * Y
REGISTER_OP(mul, paddle::operators::MulOp, paddle::operators::MulOpMaker); REGISTER_OP(mul, paddle::operators::MulOp, paddle::operators::MulOpMaker);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
mul, paddle::operators::MulKernel<paddle::platform::CPUPlace>); mul, paddle::operators::MulKernel<paddle::platform::CPUPlace, float>);
...@@ -12,9 +12,9 @@ ...@@ -12,9 +12,9 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <paddle/operators/mul_op.h> #include "paddle/operators/mul_op.h"
#include <paddle/framework/op_registry.h> #include "paddle/framework/op_registry.h"
REGISTER_OP_GPU_KERNEL(mul, REGISTER_OP_GPU_KERNEL(mul,
paddle::operators::MulKernel<paddle::platform paddle::operators::MulKernel<paddle::platform
::GPUPlace>); ::GPUPlace, float>);
\ No newline at end of file \ No newline at end of file
...@@ -14,17 +14,30 @@ ...@@ -14,17 +14,30 @@
#pragma once #pragma once
#include <glog/logging.h> #include "glog/logging.h"
#include <paddle/framework/operator.h> #include "paddle/framework/eigen.h"
#include "paddle/framework/operator.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename Place> template <typename Place, typename T>
class MulKernel : public framework::OpKernel { class MulKernel : public framework::OpKernel {
public: public:
void Compute(const framework::KernelContext &context) const override { void Compute(const framework::KernelContext& context) const override {
LOG(INFO) << "Mul kernel in " << typeid(Place).name(); Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> dim_pair = {
{Eigen::IndexPair<Eigen::DenseIndex>(1, 0)}};
auto input0 = context.Input(0)->Get<framework::Tensor>();
auto input1 = context.Input(1)->Get<framework::Tensor>();
auto* output = context.Output(0)->GetMutable<framework::Tensor>();
output->mutable_data<T>(context.GetPlace());
framework::EigenMatrix<T>::From(*output).device(
*(context.GetEigenDevice<Place>())) =
framework::EigenMatrix<T>::From(input0).contract(
framework::EigenMatrix<T>::From(input1), dim_pair);
} }
}; };
} // namespace operators } // namespace operators
......
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <paddle/framework/op_registry.h> #include "paddle/operators/rowwise_add_op.h"
#include <paddle/operators/rowwise_add_op.h> #include "paddle/framework/op_registry.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -30,7 +30,7 @@ protected: ...@@ -30,7 +30,7 @@ protected:
PADDLE_ENFORCE(dim1.size() == 1, "The second input must be vector"); PADDLE_ENFORCE(dim1.size() == 1, "The second input must be vector");
PADDLE_ENFORCE(dim0[1] == dim1[0], "The width of two input must be same"); PADDLE_ENFORCE(dim0[1] == dim1[0], "The width of two input must be same");
PADDLE_ENFORCE(outputs.size() == 1, "The output size must be 1"); PADDLE_ENFORCE(outputs.size() == 1, "The output size must be 1");
outputs[0]->set_dims(inputs[0]->dims()); outputs[0]->Resize(inputs[0]->dims());
} }
}; };
...@@ -58,4 +58,4 @@ REGISTER_OP(rowwise_add, ...@@ -58,4 +58,4 @@ REGISTER_OP(rowwise_add,
paddle::operators::RowWiseAddOpMaker); paddle::operators::RowWiseAddOpMaker);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
rowwise_add, rowwise_add,
paddle::operators::RowWiseAddKernel<paddle::platform::CPUPlace>); paddle::operators::RowWiseAddKernel<paddle::platform::CPUPlace, float>);
#include <paddle/framework/op_registry.h> #include "paddle/framework/op_registry.h"
#include <paddle/operators/rowwise_add_op.h> #include "paddle/operators/rowwise_add_op.h"
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
rowwise_add, rowwise_add,
paddle::operators::RowWiseAddKernel<paddle::platform ::GPUPlace>); paddle::operators::RowWiseAddKernel<paddle::platform ::GPUPlace, float>);
...@@ -13,17 +13,32 @@ ...@@ -13,17 +13,32 @@
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <glog/logging.h> #include "glog/logging.h"
#include <paddle/framework/operator.h> #include "paddle/framework/eigen.h"
#include "paddle/framework/operator.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename Place> template <typename Place, typename T>
class RowWiseAddKernel : public framework::OpKernel { class RowWiseAddKernel : public framework::OpKernel {
public: public:
void Compute(const framework::KernelContext &context) const override { void Compute(const framework::KernelContext& context) const override {
LOG(INFO) << "RowWiseAdd kernel in " << typeid(Place).name(); auto in0 = context.Input(0)->Get<framework::Tensor>();
auto in1 = context.Input(1)->Get<framework::Tensor>();
auto* out = context.Output(0)->GetMutable<framework::Tensor>();
out->mutable_data<T>(context.GetPlace());
auto input = framework::EigenMatrix<T>::From(in0);
auto bias = framework::EigenVector<T>::From(in1);
auto output = framework::EigenMatrix<T>::From(*out);
const int bias_size = bias.dimension(0);
const int rest_size = input.size() / bias_size;
Eigen::DSizes<int, 1> one_d(input.size());
Eigen::DSizes<int, 1> bcast(rest_size);
output.reshape(one_d).device(*(context.GetEigenDevice<Place>())) =
input.reshape(one_d) + bias.broadcast(bcast).reshape(one_d);
} }
}; };
......
...@@ -31,7 +31,7 @@ protected: ...@@ -31,7 +31,7 @@ protected:
PADDLE_ENFORCE(outputs[0] != nullptr, "outputs[0] mast be set"); PADDLE_ENFORCE(outputs[0] != nullptr, "outputs[0] mast be set");
PADDLE_ENFORCE(inputs[0]->dims() == inputs[1]->dims(), PADDLE_ENFORCE(inputs[0]->dims() == inputs[1]->dims(),
"Two input of SGD Op's dimension must be same."); "Two input of SGD Op's dimension must be same.");
outputs[0]->set_dims(inputs[0]->dims()); outputs[0]->Resize(inputs[0]->dims());
} }
}; };
......
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <paddle/framework/op_registry.h> #include "paddle/operators/sigmoid_op.h"
#include <paddle/operators/sigmoid_op.h> #include "paddle/framework/op_registry.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -24,7 +24,7 @@ protected: ...@@ -24,7 +24,7 @@ protected:
const std::vector<framework::Tensor *> &outputs) const override { const std::vector<framework::Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 1, "Sigmoid Op only have one input"); PADDLE_ENFORCE(inputs.size() == 1, "Sigmoid Op only have one input");
PADDLE_ENFORCE(outputs.size() == 1, "Sigmoid Op only have one output"); PADDLE_ENFORCE(outputs.size() == 1, "Sigmoid Op only have one output");
outputs[0]->set_dims(inputs[0]->dims()); outputs[0]->Resize(inputs[0]->dims());
} }
}; };
...@@ -34,7 +34,7 @@ public: ...@@ -34,7 +34,7 @@ public:
framework::OpAttrChecker *op_checker) framework::OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "sigmoid input"); AddInput("X", "sigmoid input");
AddInput("Y", "sigmoid output"); AddOutput("Y", "sigmoid output");
AddComment("Sigmoid function"); AddComment("Sigmoid function");
} }
}; };
...@@ -46,4 +46,5 @@ REGISTER_OP(sigmoid, ...@@ -46,4 +46,5 @@ REGISTER_OP(sigmoid,
paddle::operators::SigmoidOp, paddle::operators::SigmoidOp,
paddle::operators::SigmoidOpMaker); paddle::operators::SigmoidOpMaker);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
sigmoid, paddle::operators::SigmoidKernel<paddle::platform::CPUPlace>); sigmoid,
paddle::operators::SigmoidKernel<paddle::platform::CPUPlace, float>);
#include <paddle/operators/sigmoid_op.h> #include "paddle/operators/sigmoid_op.h"
#include <paddle/framework/op_registry.h> #include "paddle/framework/op_registry.h"
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
sigmoid, paddle::operators::SigmoidKernel<paddle::platform::GPUPlace>); sigmoid, paddle::operators::SigmoidKernel<paddle::platform::GPUPlace, float>);
...@@ -14,17 +14,25 @@ ...@@ -14,17 +14,25 @@
#pragma once #pragma once
#include <glog/logging.h> #include "glog/logging.h"
#include <paddle/framework/operator.h> #include "paddle/framework/eigen.h"
#include "paddle/framework/operator.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename Place> template <typename Place, typename T>
class SigmoidKernel : public framework::OpKernel { class SigmoidKernel : public framework::OpKernel {
public: public:
void Compute(const framework::KernelContext &context) const override { void Compute(const framework::KernelContext& context) const override {
LOG(INFO) << "Sigmoid kernel in " << typeid(Place).name(); auto input = context.Input(0)->Get<framework::Tensor>();
auto* output = context.Output(0)->GetMutable<framework::Tensor>();
output->mutable_data<T>(context.GetPlace());
framework::EigenVector<T>::Flatten(*output).device(
*(context.GetEigenDevice<Place>())) =
1.0 / (1.0 + (-1.0 * framework::EigenVector<T>::Flatten(input)).exp());
} }
}; };
} // namespace operators } // namespace operators
......
...@@ -11,8 +11,8 @@ ...@@ -11,8 +11,8 @@
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <paddle/framework/op_registry.h> #include "paddle/operators/softmax_op.h"
#include <paddle/operators/softmax_op.h> #include "paddle/framework/op_registry.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -23,9 +23,11 @@ protected: ...@@ -23,9 +23,11 @@ protected:
const std::vector<const framework::Tensor *> &inputs, const std::vector<const framework::Tensor *> &inputs,
const std::vector<framework::Tensor *> &outputs) const override { const std::vector<framework::Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 1, "Only one input is need for softmax"); PADDLE_ENFORCE(inputs.size() == 1, "Only one input is need for softmax");
PADDLE_ENFORCE(inputs[0]->dims().size() == 2,
"The input of softmax op must be matrix");
PADDLE_ENFORCE(outputs.size() == 1, "Only one output is need for softmax"); PADDLE_ENFORCE(outputs.size() == 1, "Only one output is need for softmax");
outputs[0]->set_dims(inputs[0]->dims()); outputs[0]->Resize(inputs[0]->dims());
} }
}; };
...@@ -46,4 +48,5 @@ public: ...@@ -46,4 +48,5 @@ public:
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker); REGISTER_OP(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker);
REGISTER_OP_CPU_KERNEL(softmax, ops::SoftmaxKernel<paddle::platform::CPUPlace>); REGISTER_OP_CPU_KERNEL(softmax,
ops::SoftmaxKernel<paddle::platform::CPUPlace, float>);
#include <paddle/framework/op_registry.h> #include "paddle/framework/op_registry.h"
#include <paddle/operators/softmax_op.h> #include "paddle/operators/softmax_op.h"
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
softmax, paddle::operators::SoftmaxKernel<paddle::platform::GPUPlace>); softmax, paddle::operators::SoftmaxKernel<paddle::platform::GPUPlace, float>);
...@@ -14,17 +14,49 @@ ...@@ -14,17 +14,49 @@
#pragma once #pragma once
#include <glog/logging.h> #include "glog/logging.h"
#include <paddle/framework/operator.h> #include "paddle/framework/eigen.h"
#include "paddle/framework/operator.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename Place> template <typename Place, typename T>
class SoftmaxKernel : public framework::OpKernel { class SoftmaxKernel : public framework::OpKernel {
public: public:
void Compute(const framework::KernelContext &context) const override { void Compute(const framework::KernelContext& context) const override {
LOG(INFO) << "Softmax kernel in " << typeid(Place).name(); auto input = context.Input(0)->Get<framework::Tensor>();
auto* output = context.Output(0)->GetMutable<framework::Tensor>();
output->mutable_data<T>(context.GetPlace());
auto logits = framework::EigenMatrix<T>::From(input);
auto softmax = framework::EigenMatrix<T>::From(*output);
const int kBatchDim = 0;
const int kClassDim = 1;
const int batch_size = logits.dimension(kBatchDim);
const int num_classes = logits.dimension(kClassDim);
Eigen::DSizes<int, 1> along_class(kClassDim);
Eigen::DSizes<int, 2> batch_by_one(batch_size, 1);
Eigen::DSizes<int, 2> one_by_class(1, num_classes);
auto shifted_logits = (logits -
logits.maximum(along_class)
.eval()
.reshape(batch_by_one)
.broadcast(one_by_class));
softmax.device(*(context.GetEigenDevice<Place>())) = shifted_logits.exp();
softmax.device(*(context.GetEigenDevice<Place>())) =
(softmax *
softmax.sum(along_class)
.inverse()
.eval()
.reshape(batch_by_one)
.broadcast(one_by_class));
} }
}; };
} // namespace operators } // namespace operators
......
cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python
add_op fc_op sgd_op) add_op fc_op sgd_op cross_entropy_op)
...@@ -27,8 +27,13 @@ namespace py = pybind11; ...@@ -27,8 +27,13 @@ namespace py = pybind11;
namespace pd = paddle::framework; namespace pd = paddle::framework;
USE_OP(add_two); USE_OP(add_two);
USE_OP(onehot_cross_entropy);
USE_OP_WITHOUT_KERNEL(fc); USE_OP_WITHOUT_KERNEL(fc);
USE_OP(sgd); USE_OP(sgd);
USE_OP(mul);
USE_OP(sigmoid);
USE_OP(softmax);
USE_OP(rowwise_add);
PYBIND11_PLUGIN(core) { PYBIND11_PLUGIN(core) {
py::module m("core", "C++ core of Paddle Paddle"); py::module m("core", "C++ core of Paddle Paddle");
...@@ -41,7 +46,7 @@ PYBIND11_PLUGIN(core) { ...@@ -41,7 +46,7 @@ PYBIND11_PLUGIN(core) {
[](const pd::Tensor& self) { return pd::vectorize(self.dims()); }) [](const pd::Tensor& self) { return pd::vectorize(self.dims()); })
.def("set_dims", .def("set_dims",
[](pd::Tensor& self, const std::vector<int>& dim) { [](pd::Tensor& self, const std::vector<int>& dim) {
self.set_dims(pd::make_ddim(dim)); self.Resize(pd::make_ddim(dim));
}) })
.def("alloc_float", .def("alloc_float",
[](pd::Tensor& self) { [](pd::Tensor& self) {
......
...@@ -86,7 +86,7 @@ void PyTensorSetFromArray( ...@@ -86,7 +86,7 @@ void PyTensorSetFromArray(
dims.push_back((int)array.shape()[i]); dims.push_back((int)array.shape()[i]);
} }
self.set_dims(framework::make_ddim(dims)); self.Resize(framework::make_ddim(dims));
auto *dst = self.mutable_data<T>(paddle::platform::CPUPlace()); auto *dst = self.mutable_data<T>(paddle::platform::CPUPlace());
std::memcpy(dst, array.data(), sizeof(T) * array.size()); std::memcpy(dst, array.data(), sizeof(T) * array.size());
} }
......
...@@ -3219,6 +3219,10 @@ def ParameterHook(type, **kwargs): ...@@ -3219,6 +3219,10 @@ def ParameterHook(type, **kwargs):
if sparsity_ratio is not None: if sparsity_ratio is not None:
hook.sparsity_ratio = sparsity_ratio hook.sparsity_ratio = sparsity_ratio
return hook return hook
elif type == 'dpruning':
hook = ParameterUpdaterHookConfig()
hook.type = type
return hook
else: else:
return None return None
......
...@@ -3173,11 +3173,11 @@ def memory(name, ...@@ -3173,11 +3173,11 @@ def memory(name,
@wrap_bias_attr_default() @wrap_bias_attr_default()
@wrap_act_default( @wrap_act_default(param_names=['gate_act'], act=SigmoidActivation())
param_names=['gate_act', 'state_act'], act=SigmoidActivation()) @wrap_act_default(param_names=['state_act'], act=TanhActivation())
@wrap_act_default(act=TanhActivation()) @wrap_act_default(act=TanhActivation())
@wrap_name_default('lstm_step') @wrap_name_default('lstm_step')
@layer_support() @layer_support(ERROR_CLIPPING, DROPOUT)
def lstm_step_layer(input, def lstm_step_layer(input,
state, state,
size=None, size=None,
...@@ -3531,12 +3531,7 @@ def SubsequenceInput(input): ...@@ -3531,12 +3531,7 @@ def SubsequenceInput(input):
@wrap_name_default("recurrent_group") @wrap_name_default("recurrent_group")
def recurrent_group(step, def recurrent_group(step, input, reverse=False, name=None, targetInlink=None):
input,
reverse=False,
name=None,
targetInlink=None,
is_generating=False):
""" """
Recurrent layer group is an extremely flexible recurrent unit in Recurrent layer group is an extremely flexible recurrent unit in
PaddlePaddle. As long as the user defines the calculation done within a PaddlePaddle. As long as the user defines the calculation done within a
...@@ -3602,21 +3597,12 @@ def recurrent_group(step, ...@@ -3602,21 +3597,12 @@ def recurrent_group(step,
:type targetInlink: LayerOutput|SubsequenceInput :type targetInlink: LayerOutput|SubsequenceInput
:param is_generating: If is generating, none of input type should be LayerOutput;
else, for training or testing, one of the input type must
be LayerOutput.
:type is_generating: bool
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
model_type('recurrent_nn') model_type('recurrent_nn')
def is_single_input(x): if isinstance(input, LayerOutput) or isinstance(input, StaticInput):
return isinstance(x, LayerOutput) or isinstance(x, StaticInput)
if is_single_input(input):
input = [input] input = [input]
assert isinstance(input, collections.Sequence) assert isinstance(input, collections.Sequence)
...@@ -3630,13 +3616,8 @@ def recurrent_group(step, ...@@ -3630,13 +3616,8 @@ def recurrent_group(step,
in_links=map(lambda x: x.name, in_links), in_links=map(lambda x: x.name, in_links),
seq_reversed=reverse) seq_reversed=reverse)
in_args = [] in_args = []
has_LayerOutput = False
for each_input in input: for each_input in input:
assert is_single_input(each_input) if isinstance(each_input, StaticInput): # StaticInput
if isinstance(each_input, LayerOutput):
in_args.append(each_input)
has_LayerOutput = True
else: # StaticInput
mem_name = "__%s_memory__" % each_input.input.name mem_name = "__%s_memory__" % each_input.input.name
mem = memory( mem = memory(
name=None, name=None,
...@@ -3644,24 +3625,26 @@ def recurrent_group(step, ...@@ -3644,24 +3625,26 @@ def recurrent_group(step,
boot_layer=each_input.input) boot_layer=each_input.input)
mem.set_input(mem) mem.set_input(mem)
in_args.append(mem) in_args.append(mem)
else:
assert (is_generating != has_LayerOutput) in_args.append(each_input)
layer_outs = step(*in_args) layer_outs = step(*in_args)
if isinstance(layer_outs, LayerOutput): if isinstance(layer_outs, LayerOutput):
layer_outs = [layer_outs] layer_outs = [layer_outs]
for ot in layer_outs: for layer_out in layer_outs:
assert isinstance(ot, LayerOutput) assert isinstance(
ot.reverse = reverse layer_out, LayerOutput
RecurrentLayerGroupSetOutLink(ot.name) ), "Type of step function's return value must be LayerOutput."
layer_out.reverse = reverse
RecurrentLayerGroupSetOutLink(layer_out.name)
RecurrentLayerGroupEnd(name=name) RecurrentLayerGroupEnd(name=name)
for layer_out in layer_outs: for layer_out in layer_outs:
# Thee previous full_name is the name is the rnn group # The previous full_name is the name inside the recurrent group.
# We need a full_name outside the rnn group # We need a full_name outside the recurrent group.
layer_out.full_name = MakeLayerNameInSubmodel(layer_out.name) layer_out.full_name = MakeLayerNameInSubmodel(layer_out.name)
if len(layer_outs) == 1: if len(layer_outs) == 1:
...@@ -3684,7 +3667,20 @@ class BaseGeneratedInput(object): ...@@ -3684,7 +3667,20 @@ class BaseGeneratedInput(object):
class GeneratedInput(BaseGeneratedInput): class GeneratedInput(BaseGeneratedInput):
def after_real_step(self, input): def after_real_step(self, input):
return maxid_layer(input=input, name='__beam_search_predict__') if isinstance(input, LayerOutput):
input = [input]
elif isinstance(input, collections.Sequence):
input = list(input)
if len(input) > 1:
logger.info(
("More than one layers inside the recurrent_group "
"are returned as outputs of the entire recurrent_group "
"PLEASE garantee the first output is probability of "
"the predicted next word."))
return [maxid_layer(
input=input[0], name='__beam_search_predict__')] + (
input[1:] if len(input) > 1 else [])
def before_real_step(self): def before_real_step(self):
predict_id = memory( predict_id = memory(
...@@ -3871,6 +3867,7 @@ def beam_search(step, ...@@ -3871,6 +3867,7 @@ def beam_search(step,
:type step: callable :type step: callable
:param input: Input data for the recurrent unit, which should include the :param input: Input data for the recurrent unit, which should include the
previously generated words as a GeneratedInput object. previously generated words as a GeneratedInput object.
In beam_search, none of the input's type should be LayerOutput.
:type input: list :type input: list
:param bos_id: Index of the start symbol in the dictionary. The start symbol :param bos_id: Index of the start symbol in the dictionary. The start symbol
is a special token for NLP task, which indicates the is a special token for NLP task, which indicates the
...@@ -3912,15 +3909,18 @@ def beam_search(step, ...@@ -3912,15 +3909,18 @@ def beam_search(step,
real_input = [] real_input = []
for i, each_input in enumerate(input): for i, each_input in enumerate(input):
assert isinstance(each_input, StaticInput) or isinstance( assert not isinstance(each_input, LayerOutput), (
each_input, BaseGeneratedInput) "in beam_search, "
"none of the input should has a type of LayerOutput.")
if isinstance(each_input, BaseGeneratedInput): if isinstance(each_input, BaseGeneratedInput):
assert generated_input_index == -1 assert generated_input_index == -1, ("recurrent_group accepts "
"only one GeneratedInput.")
generated_input_index = i generated_input_index = i
else: else:
real_input.append(each_input) real_input.append(each_input)
assert generated_input_index != -1 assert generated_input_index != -1, "No GeneratedInput is given."
gipt = input[generated_input_index] gipt = input[generated_input_index]
...@@ -3941,17 +3941,11 @@ def beam_search(step, ...@@ -3941,17 +3941,11 @@ def beam_search(step,
predict = gipt.after_real_step(step(*args)) predict = gipt.after_real_step(step(*args))
eos_layer(input=predict, eos_id=eos_id, name=eos_name) eos_layer(input=predict[0], eos_id=eos_id, name=eos_name)
return predict return predict
tmp = recurrent_group( return recurrent_group(
step=__real_step__, step=__real_step__, input=real_input, reverse=False, name=name)
input=real_input,
reverse=False,
name=name,
is_generating=True)
return tmp
def __cost_input__(input, label, weight=None): def __cost_input__(input, label, weight=None):
......
...@@ -614,18 +614,17 @@ def simple_lstm(input, ...@@ -614,18 +614,17 @@ def simple_lstm(input,
@wrap_name_default('lstm_unit') @wrap_name_default('lstm_unit')
def lstmemory_unit(input, def lstmemory_unit(input,
memory_boot=None, out_memory=None,
name=None, name=None,
size=None, size=None,
param_attr=None, param_attr=None,
act=None, act=None,
gate_act=None, gate_act=None,
state_act=None, state_act=None,
mixed_bias_attr=None, input_proj_bias_attr=None,
input_proj_layer_attr=None,
lstm_bias_attr=None, lstm_bias_attr=None,
mixed_layer_attr=None, lstm_layer_attr=None):
lstm_layer_attr=None,
get_output_layer_attr=None):
""" """
Define calculations that a LSTM unit performs during a single time step. Define calculations that a LSTM unit performs during a single time step.
This function itself is not a recurrent layer, so it can not be This function itself is not a recurrent layer, so it can not be
...@@ -662,8 +661,8 @@ def lstmemory_unit(input, ...@@ -662,8 +661,8 @@ def lstmemory_unit(input,
:param input: input layer name. :param input: input layer name.
:type input: LayerOutput :type input: LayerOutput
:param memory_boot: the initialization state of the LSTM cell. :param out_memory: output of previous time step
:type memory_boot: LayerOutput | None :type out_memory: LayerOutput | None
:param name: lstmemory unit name. :param name: lstmemory unit name.
:type name: basestring :type name: basestring
:param size: lstmemory unit size. :param size: lstmemory unit size.
...@@ -676,33 +675,35 @@ def lstmemory_unit(input, ...@@ -676,33 +675,35 @@ def lstmemory_unit(input,
:type gate_act: BaseActivation :type gate_act: BaseActivation
:param state_act: lstm state activiation type. :param state_act: lstm state activiation type.
:type state_act: BaseActivation :type state_act: BaseActivation
:param mixed_bias_attr: bias parameter attribute of mixed layer. :param input_proj_bias_attr: bias attribute for input-to-hidden projection.
False means no bias, None means default bias. False means no bias, None means default bias.
:type mixed_bias_attr: ParameterAttribute|False :type input_proj_bias_attr: ParameterAttribute|False|None
:param input_proj_layer_attr: extra layer attribute for input to hidden
projection of the LSTM unit, such as dropout, error clipping.
:type input_proj_layer_attr: ExtraLayerAttribute
:param lstm_bias_attr: bias parameter attribute of lstm layer. :param lstm_bias_attr: bias parameter attribute of lstm layer.
False means no bias, None means default bias. False means no bias, None means default bias.
:type lstm_bias_attr: ParameterAttribute|False :type lstm_bias_attr: ParameterAttribute|False
:param mixed_layer_attr: mixed layer's extra attribute.
:type mixed_layer_attr: ExtraLayerAttribute
:param lstm_layer_attr: lstm layer's extra attribute. :param lstm_layer_attr: lstm layer's extra attribute.
:type lstm_layer_attr: ExtraLayerAttribute :type lstm_layer_attr: ExtraLayerAttribute
:param get_output_layer_attr: get output layer's extra attribute.
:type get_output_layer_attr: ExtraLayerAttribute
:return: lstmemory unit name. :return: lstmemory unit name.
:rtype: LayerOutput :rtype: LayerOutput
""" """
if size is None: if size is None:
assert input.size % 4 == 0 assert input.size % 4 == 0
size = input.size / 4 size = input.size / 4
out_mem = memory(name=name, size=size) if out_memory is None:
state_mem = memory( out_mem = memory(name=name, size=size)
name="%s_state" % name, size=size, boot_layer=memory_boot) else:
out_mem = out_memory
state_mem = memory(name="%s_state" % name, size=size)
with mixed_layer( with mixed_layer(
name="%s_input_recurrent" % name, name="%s_input_recurrent" % name,
size=size * 4, size=size * 4,
bias_attr=mixed_bias_attr, bias_attr=input_proj_bias_attr,
layer_attr=mixed_layer_attr, layer_attr=input_proj_layer_attr,
act=IdentityActivation()) as m: act=IdentityActivation()) as m:
m += identity_projection(input=input) m += identity_projection(input=input)
m += full_matrix_projection(input=out_mem, param_attr=param_attr) m += full_matrix_projection(input=out_mem, param_attr=param_attr)
...@@ -717,11 +718,7 @@ def lstmemory_unit(input, ...@@ -717,11 +718,7 @@ def lstmemory_unit(input,
gate_act=gate_act, gate_act=gate_act,
state_act=state_act, state_act=state_act,
layer_attr=lstm_layer_attr) layer_attr=lstm_layer_attr)
get_output_layer( get_output_layer(name='%s_state' % name, input=lstm_out, arg_name='state')
name='%s_state' % name,
input=lstm_out,
arg_name='state',
layer_attr=get_output_layer_attr)
return lstm_out return lstm_out
...@@ -730,17 +727,16 @@ def lstmemory_unit(input, ...@@ -730,17 +727,16 @@ def lstmemory_unit(input,
def lstmemory_group(input, def lstmemory_group(input,
size=None, size=None,
name=None, name=None,
memory_boot=None, out_memory=None,
reverse=False, reverse=False,
param_attr=None, param_attr=None,
act=None, act=None,
gate_act=None, gate_act=None,
state_act=None, state_act=None,
mixed_bias_attr=None, input_proj_bias_attr=None,
input_proj_layer_attr=None,
lstm_bias_attr=None, lstm_bias_attr=None,
mixed_layer_attr=None, lstm_layer_attr=None):
lstm_layer_attr=None,
get_output_layer_attr=None):
""" """
lstm_group is a recurrent_group version of Long Short Term Memory. It lstm_group is a recurrent_group version of Long Short Term Memory. It
does exactly the same calculation as the lstmemory layer (see lstmemory in does exactly the same calculation as the lstmemory layer (see lstmemory in
...@@ -774,8 +770,8 @@ def lstmemory_group(input, ...@@ -774,8 +770,8 @@ def lstmemory_group(input,
:type size: int :type size: int
:param name: name of the lstmemory group. :param name: name of the lstmemory group.
:type name: basestring :type name: basestring
:param memory_boot: the initialization state of LSTM cell. :param out_memory: output of previous time step
:type memory_boot: LayerOutput | None :type out_memory: LayerOutput | None
:param reverse: is lstm reversed :param reverse: is lstm reversed
:type reverse: bool :type reverse: bool
:param param_attr: Parameter config, None if use default. :param param_attr: Parameter config, None if use default.
...@@ -786,18 +782,17 @@ def lstmemory_group(input, ...@@ -786,18 +782,17 @@ def lstmemory_group(input,
:type gate_act: BaseActivation :type gate_act: BaseActivation
:param state_act: lstm state activiation type. :param state_act: lstm state activiation type.
:type state_act: BaseActivation :type state_act: BaseActivation
:param mixed_bias_attr: bias parameter attribute of mixed layer.
False means no bias, None means default bias.
:type mixed_bias_attr: ParameterAttribute|False
:param lstm_bias_attr: bias parameter attribute of lstm layer. :param lstm_bias_attr: bias parameter attribute of lstm layer.
False means no bias, None means default bias. False means no bias, None means default bias.
:type lstm_bias_attr: ParameterAttribute|False :type lstm_bias_attr: ParameterAttribute|False
:param mixed_layer_attr: mixed layer's extra attribute. :param input_proj_bias_attr: bias attribute for input-to-hidden projection.
:type mixed_layer_attr: ExtraLayerAttribute False means no bias, None means default bias.
:type input_proj_bias_attr: ParameterAttribute|False|None
:param input_proj_layer_attr: extra layer attribute for input to hidden
projection of the LSTM unit, such as dropout, error clipping.
:type input_proj_layer_attr: ExtraLayerAttribute
:param lstm_layer_attr: lstm layer's extra attribute. :param lstm_layer_attr: lstm layer's extra attribute.
:type lstm_layer_attr: ExtraLayerAttribute :type lstm_layer_attr: ExtraLayerAttribute
:param get_output_layer_attr: get output layer's extra attribute.
:type get_output_layer_attr: ExtraLayerAttribute
:return: the lstmemory group. :return: the lstmemory group.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -805,18 +800,17 @@ def lstmemory_group(input, ...@@ -805,18 +800,17 @@ def lstmemory_group(input,
def __lstm_step__(ipt): def __lstm_step__(ipt):
return lstmemory_unit( return lstmemory_unit(
input=ipt, input=ipt,
memory_boot=memory_boot,
name=name, name=name,
size=size, size=size,
mixed_bias_attr=mixed_bias_attr,
mixed_layer_attr=mixed_layer_attr,
param_attr=param_attr,
lstm_bias_attr=lstm_bias_attr,
act=act, act=act,
gate_act=gate_act, gate_act=gate_act,
state_act=state_act, state_act=state_act,
out_memory=out_memory,
input_proj_bias_attr=input_proj_bias_attr,
input_proj_layer_attr=input_proj_layer_attr,
param_attr=param_attr,
lstm_layer_attr=lstm_layer_attr, lstm_layer_attr=lstm_layer_attr,
get_output_layer_attr=get_output_layer_attr) lstm_bias_attr=lstm_bias_attr)
return recurrent_group( return recurrent_group(
name='%s_recurrent_group' % name, name='%s_recurrent_group' % name,
......
...@@ -104,7 +104,7 @@ layers { ...@@ -104,7 +104,7 @@ layers {
} }
bias_parameter_name: "lstm_bias" bias_parameter_name: "lstm_bias"
active_gate_type: "sigmoid" active_gate_type: "sigmoid"
active_state_type: "sigmoid" active_state_type: "tanh"
} }
layers { layers {
name: "__lstm_group_0___state@__lstm_group_0___recurrent_group" name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
...@@ -183,7 +183,7 @@ layers { ...@@ -183,7 +183,7 @@ layers {
} }
bias_parameter_name: "lstm_bias" bias_parameter_name: "lstm_bias"
active_gate_type: "sigmoid" active_gate_type: "sigmoid"
active_state_type: "sigmoid" active_state_type: "tanh"
} }
layers { layers {
name: "__lstm_group_1___state@__lstm_group_1___recurrent_group" name: "__lstm_group_1___state@__lstm_group_1___recurrent_group"
......
...@@ -258,7 +258,7 @@ layers { ...@@ -258,7 +258,7 @@ layers {
} }
bias_parameter_name: "___lstm_group_0__@__lstm_group_0___recurrent_group.wbias" bias_parameter_name: "___lstm_group_0__@__lstm_group_0___recurrent_group.wbias"
active_gate_type: "sigmoid" active_gate_type: "sigmoid"
active_state_type: "sigmoid" active_state_type: "tanh"
} }
layers { layers {
name: "__lstm_group_0___state@__lstm_group_0___recurrent_group" name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
......
...@@ -20,12 +20,13 @@ lstm1 = lstmemory_group( ...@@ -20,12 +20,13 @@ lstm1 = lstmemory_group(
input=m1, input=m1,
param_attr=lstm_param, param_attr=lstm_param,
lstm_bias_attr=lstm_bias, lstm_bias_attr=lstm_bias,
mixed_bias_attr=False) input_proj_bias_attr=False)
lstm2 = lstmemory_group( lstm2 = lstmemory_group(
input=m2, input=m2,
param_attr=lstm_param, param_attr=lstm_param,
lstm_bias_attr=lstm_bias, lstm_bias_attr=lstm_bias,
mixed_bias_attr=False) input_proj_bias_attr=False)
softmax_param = ParamAttr(name='softmax_param') softmax_param = ParamAttr(name='softmax_param')
......
...@@ -116,7 +116,7 @@ def reader_creator(data_file, ...@@ -116,7 +116,7 @@ def reader_creator(data_file,
data = batch['data'] data = batch['data']
labels = batch['label'] labels = batch['label']
for sample, label in itertools.izip(data, batch['label']): for sample, label in itertools.izip(data, batch['label']):
yield sample, int(label) yield sample, int(label) - 1
if use_xmap: if use_xmap:
return xmap_readers(mapper, reader, cpu_count(), buffered_size) return xmap_readers(mapper, reader, cpu_count(), buffered_size)
......
add_python_test(test_framework test_protobuf.py test_scope.py add_python_test(test_framework
test_default_scope_funcs.py test_op_creation_methods.py test_protobuf.py
test_tensor.py test_fc_op.py test_add_two_op.py test_sgd_op.py) test_scope.py
test_default_scope_funcs.py
test_op_creation_methods.py
test_tensor.py
test_fc_op.py
test_add_two_op.py
test_sgd_op.py
test_cross_entropy_op.py
test_mul_op.py
test_sigmoid_op.py
test_softmax_op.py
test_rowwise_add_op.py)
...@@ -5,6 +5,18 @@ import paddle.v2.framework.create_op_creation_methods as creation ...@@ -5,6 +5,18 @@ import paddle.v2.framework.create_op_creation_methods as creation
class OpTestMeta(type): class OpTestMeta(type):
"""
Operator Test ClassMeta.
It injects `test_all` method into user's OperatorTest class, to make Python
unittest module run that method.
The `test_all` read what value is stored in `self`. It use self's values to
create and run a operator, and check whether that op is OK or not.
See `test_add_two_op` for example usage.
"""
def __new__(cls, name, bases, attrs): def __new__(cls, name, bases, attrs):
obj = super(OpTestMeta, cls).__new__(cls, name, bases, attrs) obj = super(OpTestMeta, cls).__new__(cls, name, bases, attrs)
...@@ -44,7 +56,10 @@ class OpTestMeta(type): ...@@ -44,7 +56,10 @@ class OpTestMeta(type):
for out_name in func.all_output_args: for out_name in func.all_output_args:
actual = numpy.array(scope.get_var(out_name).get_tensor()) actual = numpy.array(scope.get_var(out_name).get_tensor())
expect = getattr(self, out_name) expect = getattr(self, out_name)
numpy.testing.assert_almost_equal(actual, expect) # TODO(qijun) The default decimal is 7, but numpy.dot and eigen.mul
# has some diff, and could not pass unittest. So I set decimal 3 here.
# And I will check this in future.
numpy.testing.assert_almost_equal(actual, expect, decimal=3)
obj.test_all = test_all obj.test_all = test_all
return obj return obj
import unittest
import numpy
from op_test_util import OpTestMeta
class TestSGD(unittest.TestCase):
__metaclass__ = OpTestMeta
def setUp(self):
self.type = "onehot_cross_entropy"
batch_size = 100
class_num = 10
self.X = numpy.random.random((batch_size, class_num)).astype("float32")
self.label = 5 * numpy.ones(batch_size).astype("int32")
Y = []
for i in range(0, batch_size):
Y.append(-numpy.log(self.X[i][self.label[i]]))
self.Y = numpy.array(Y).astype("float32")
if __name__ == "__main__":
unittest.main()
import unittest
from op_test_util import OpTestMeta
import numpy as np
class TestMulOp(unittest.TestCase):
__metaclass__ = OpTestMeta
def setUp(self):
self.type = "mul"
self.X = np.random.random((32, 784)).astype("float32")
self.Y = np.random.random((784, 100)).astype("float32")
self.Out = np.dot(self.X, self.Y)
if __name__ == '__main__':
unittest.main()
import unittest
from op_test_util import OpTestMeta
import numpy as np
class TestRowwiseAddOp(unittest.TestCase):
__metaclass__ = OpTestMeta
def setUp(self):
self.type = "rowwise_add"
self.X = np.random.random((32, 784)).astype("float32")
self.b = np.random.random(784).astype("float32")
self.Out = np.add(self.X, self.b)
if __name__ == '__main__':
unittest.main()
import unittest
from op_test_util import OpTestMeta
import numpy as np
class TestSigmoidOp(unittest.TestCase):
__metaclass__ = OpTestMeta
def setUp(self):
self.type = "sigmoid"
self.X = np.random.random((32, 100)).astype("float32")
self.Y = 1 / (1 + np.exp(-self.X))
if __name__ == '__main__':
unittest.main()
import unittest
from op_test_util import OpTestMeta
import numpy as np
def stable_softmax(x):
"""Compute the softmax of vector x in a numerically stable way."""
shiftx = x - np.max(x)
exps = np.exp(shiftx)
return exps / np.sum(exps)
class TestSoftmaxOp(unittest.TestCase):
__metaclass__ = OpTestMeta
def setUp(self):
self.type = "softmax"
self.X = np.random.random((32, 100)).astype("float32")
self.Y = np.apply_along_axis(stable_softmax, 1, self.X)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册