Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
61fa5218
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
61fa5218
编写于
11月 12, 2018
作者:
P
peizhilin
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'upstream/develop' into windows/build
上级
7840d181
bd294378
变更
24
隐藏空白更改
内联
并排
Showing
24 changed file
with
332 addition
and
111 deletion
+332
-111
CMakeLists.txt
CMakeLists.txt
+4
-0
cmake/external/mkldnn.cmake
cmake/external/mkldnn.cmake
+0
-1
cmake/external/ngraph.cmake
cmake/external/ngraph.cmake
+92
-0
cmake/external/protobuf.cmake
cmake/external/protobuf.cmake
+49
-53
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+21
-0
paddle/fluid/operators/gather.cu.h
paddle/fluid/operators/gather.cu.h
+3
-1
paddle/fluid/operators/gather.h
paddle/fluid/operators/gather.h
+2
-1
paddle/fluid/operators/gather_op.cc
paddle/fluid/operators/gather_op.cc
+4
-2
paddle/fluid/operators/reduce_max_op.cu
paddle/fluid/operators/reduce_max_op.cu
+0
-9
paddle/fluid/operators/reduce_max_op.part.cu
paddle/fluid/operators/reduce_max_op.part.cu
+25
-0
paddle/fluid/operators/reduce_mean_op.cu
paddle/fluid/operators/reduce_mean_op.cu
+0
-10
paddle/fluid/operators/reduce_mean_op.part.cu
paddle/fluid/operators/reduce_mean_op.part.cu
+26
-0
paddle/fluid/operators/reduce_min_op.cu
paddle/fluid/operators/reduce_min_op.cu
+0
-9
paddle/fluid/operators/reduce_min_op.part.cu
paddle/fluid/operators/reduce_min_op.part.cu
+25
-0
paddle/fluid/operators/reduce_prod_op.cu
paddle/fluid/operators/reduce_prod_op.cu
+0
-9
paddle/fluid/operators/reduce_prod_op.part.cu
paddle/fluid/operators/reduce_prod_op.part.cu
+25
-0
paddle/fluid/operators/reduce_sum_op.cu
paddle/fluid/operators/reduce_sum_op.cu
+0
-10
paddle/fluid/operators/reduce_sum_op.part.cu
paddle/fluid/operators/reduce_sum_op.part.cu
+26
-0
paddle/fluid/operators/scatter.cu.h
paddle/fluid/operators/scatter.cu.h
+2
-1
paddle/fluid/operators/scatter.h
paddle/fluid/operators/scatter.h
+2
-1
paddle/fluid/operators/stack_op.cc
paddle/fluid/operators/stack_op.cc
+6
-2
paddle/fluid/operators/stack_op.cu
paddle/fluid/operators/stack_op.cu
+6
-2
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+2
-0
python/setup.py.in
python/setup.py.in
+12
-0
未找到文件。
CMakeLists.txt
浏览文件 @
61fa5218
...
@@ -46,6 +46,7 @@ option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_F
...
@@ -46,6 +46,7 @@ option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_F
option
(
WITH_AMD_GPU
"Compile PaddlePaddle with AMD GPU"
OFF
)
option
(
WITH_AMD_GPU
"Compile PaddlePaddle with AMD GPU"
OFF
)
option
(
WITH_AVX
"Compile PaddlePaddle with AVX intrinsics"
${
AVX_FOUND
}
)
option
(
WITH_AVX
"Compile PaddlePaddle with AVX intrinsics"
${
AVX_FOUND
}
)
option
(
WITH_MKL
"Compile PaddlePaddle with MKL support."
${
AVX_FOUND
}
)
option
(
WITH_MKL
"Compile PaddlePaddle with MKL support."
${
AVX_FOUND
}
)
option
(
WITH_NGRAPH
"Compile PaddlePaddle with nGraph support."
OFF
)
option
(
WITH_DSO
"Compile PaddlePaddle with dynamic linked CUDA"
ON
)
option
(
WITH_DSO
"Compile PaddlePaddle with dynamic linked CUDA"
ON
)
option
(
WITH_TESTING
"Compile PaddlePaddle with unit testing"
OFF
)
option
(
WITH_TESTING
"Compile PaddlePaddle with unit testing"
OFF
)
option
(
WITH_SWIG_PY
"Compile PaddlePaddle with inference api"
ON
)
option
(
WITH_SWIG_PY
"Compile PaddlePaddle with inference api"
ON
)
...
@@ -108,6 +109,8 @@ if(ANDROID OR IOS)
...
@@ -108,6 +109,8 @@ if(ANDROID OR IOS)
"Disable RDMA when cross-compiling for Android and iOS"
FORCE
)
"Disable RDMA when cross-compiling for Android and iOS"
FORCE
)
set
(
WITH_MKL OFF CACHE STRING
set
(
WITH_MKL OFF CACHE STRING
"Disable MKL when cross-compiling for Android and iOS"
FORCE
)
"Disable MKL when cross-compiling for Android and iOS"
FORCE
)
set
(
WITH_NGRAPH OFF CACHE STRING
"Disable nGraph when cross-compiling for Android and iOS"
FORCE
)
set
(
WITH_GOLANG OFF CACHE STRING
set
(
WITH_GOLANG OFF CACHE STRING
"Disable golang when cross-compiling for Android and iOS"
FORCE
)
"Disable golang when cross-compiling for Android and iOS"
FORCE
)
...
@@ -176,6 +179,7 @@ include(external/protobuf) # download, build, install protobuf
...
@@ -176,6 +179,7 @@ include(external/protobuf) # download, build, install protobuf
include
(
external/python
)
# download, build, install python
include
(
external/python
)
# download, build, install python
include
(
external/openblas
)
# download, build, install openblas
include
(
external/openblas
)
# download, build, install openblas
include
(
external/mkldnn
)
# download, build, install mkldnn
include
(
external/mkldnn
)
# download, build, install mkldnn
include
(
external/ngraph
)
# download, build, install nGraph
include
(
external/swig
)
# download, build, install swig
include
(
external/swig
)
# download, build, install swig
include
(
external/boost
)
# download boost
include
(
external/boost
)
# download boost
include
(
external/any
)
# download libn::any
include
(
external/any
)
# download libn::any
...
...
cmake/external/mkldnn.cmake
浏览文件 @
61fa5218
...
@@ -37,7 +37,6 @@ SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
...
@@ -37,7 +37,6 @@ SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
SET
(
CMAKE_INSTALL_RPATH
"
${
CMAKE_INSTALL_RPATH
}
"
"
${
MKLDNN_INSTALL_DIR
}
/lib"
)
SET
(
CMAKE_INSTALL_RPATH
"
${
CMAKE_INSTALL_RPATH
}
"
"
${
MKLDNN_INSTALL_DIR
}
/lib"
)
INCLUDE_DIRECTORIES
(
${
MKLDNN_INC_DIR
}
)
# For MKLDNN code to include internal headers.
INCLUDE_DIRECTORIES
(
${
MKLDNN_INC_DIR
}
)
# For MKLDNN code to include internal headers.
INCLUDE_DIRECTORIES
(
${
THIRD_PARTY_PATH
}
/install
)
# For Paddle code to include mkldnn.h
IF
(
${
CBLAS_PROVIDER
}
STREQUAL
"MKLML"
)
IF
(
${
CBLAS_PROVIDER
}
STREQUAL
"MKLML"
)
SET
(
MKLDNN_DEPENDS
${
MKLML_PROJECT
}
)
SET
(
MKLDNN_DEPENDS
${
MKLML_PROJECT
}
)
...
...
cmake/external/ngraph.cmake
0 → 100644
浏览文件 @
61fa5218
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
add_library
(
ngraph INTERFACE
)
IF
(
WIN32 OR APPLE
)
MESSAGE
(
WARNING
"Windows or Mac is not supported with nGraph in Paddle yet."
"Force WITH_NGRAPH=OFF"
)
SET
(
WITH_NGRAPH OFF CACHE STRING
"Disable nGraph in Windows and MacOS"
FORCE
)
ENDIF
()
IF
(
${
WITH_NGRAPH
}
AND NOT
${
WITH_MKLDNN
}
)
MESSAGE
(
WARNING
"nGraph needs mkl-dnn to be enabled."
"Force WITH_NGRAPH=OFF"
)
SET
(
WITH_NGRAPH OFF CACHE STRING
"Disable nGraph if mkl-dnn is disabled"
FORCE
)
ENDIF
()
IF
(
NOT
${
WITH_NGRAPH
}
)
return
()
ENDIF
()
INCLUDE
(
ExternalProject
)
SET
(
NGRAPH_PROJECT
"extern_ngraph"
)
SET
(
NGRAPH_VERSION
"0.9"
)
SET
(
NGRAPH_GIT_TAG
"f9fd9d4cc318dc59dd4b68448e7fbb5f67a28bd0"
)
SET
(
NGRAPH_SOURCES_DIR
${
THIRD_PARTY_PATH
}
/ngraph
)
SET
(
NGRAPH_INSTALL_DIR
${
THIRD_PARTY_PATH
}
/install/ngraph
)
SET
(
NGRAPH_INC_DIR
${
NGRAPH_INSTALL_DIR
}
/include
)
SET
(
NGRAPH_SHARED_LIB_NAME libngraph.so.
${
NGRAPH_VERSION
}
)
SET
(
NGRAPH_CPU_LIB_NAME libcpu_backend.so
)
SET
(
NGRAPH_TBB_LIB_NAME libtbb.so.2
)
SET
(
NGRAPH_GIT_REPO
"https://github.com/NervanaSystems/ngraph.git"
)
ExternalProject_Add
(
${
NGRAPH_PROJECT
}
${
EXTERNAL_PROJECT_LOG_ARGS
}
DEPENDS
${
MKLDNN_PROJECT
}
${
MKLML_PROJECT
}
GIT_REPOSITORY
${
NGRAPH_GIT_REPO
}
GIT_TAG
${
NGRAPH_GIT_TAG
}
PREFIX
${
NGRAPH_SOURCES_DIR
}
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
${
NGRAPH_INSTALL_DIR
}
CMAKE_ARGS -DNGRAPH_UNIT_TEST_ENABLE=FALSE
CMAKE_ARGS -DNGRAPH_TOOLS_ENABLE=FALSE
CMAKE_ARGS -DNGRAPH_INTERPRETER_ENABLE=FALSE
CMAKE_ARGS -DNGRAPH_DEX_ONLY=TRUE
CMAKE_ARGS -DCMAKE_BUILD_TYPE=
${
CMAKE_BUILD_TYPE
}
CMAKE_ARGS -DMKLDNN_INCLUDE_DIR=
${
MKLDNN_INC_DIR
}
CMAKE_ARGS -DMKLDNN_LIB_DIR=
${
MKLDNN_INSTALL_DIR
}
/lib
)
if
(
UNIX AND NOT APPLE
)
include
(
GNUInstallDirs
)
SET
(
NGRAPH_LIB_DIR
${
NGRAPH_INSTALL_DIR
}
/
${
CMAKE_INSTALL_LIBDIR
}
)
else
()
SET
(
NGRAPH_LIB_DIR
${
NGRAPH_INSTALL_DIR
}
/lib
)
endif
()
MESSAGE
(
STATUS
"nGraph lib will be installed at:
${
NGRAPH_LIB_DIR
}
"
)
SET
(
NGRAPH_SHARED_LIB
${
NGRAPH_LIB_DIR
}
/
${
NGRAPH_SHARED_LIB_NAME
}
)
SET
(
NGRAPH_CPU_LIB
${
NGRAPH_LIB_DIR
}
/
${
NGRAPH_CPU_LIB_NAME
}
)
SET
(
NGRAPH_TBB_LIB
${
NGRAPH_LIB_DIR
}
/
${
NGRAPH_TBB_LIB_NAME
}
)
# Workaround for nGraph expecting mklml to be in mkldnn install directory.
ExternalProject_Add_Step
(
${
NGRAPH_PROJECT
}
PrepareMKL
COMMAND
${
CMAKE_COMMAND
}
-E create_symlink
${
MKLML_LIB
}
${
MKLDNN_INSTALL_DIR
}
/lib/libmklml_intel.so
COMMAND
${
CMAKE_COMMAND
}
-E create_symlink
${
MKLML_IOMP_LIB
}
${
MKLDNN_INSTALL_DIR
}
/lib/libiomp5.so
DEPENDEES download
DEPENDERS configure
)
add_dependencies
(
ngraph
${
NGRAPH_PROJECT
}
)
target_compile_definitions
(
ngraph INTERFACE -DPADDLE_WITH_NGRAPH
)
target_include_directories
(
ngraph INTERFACE
${
NGRAPH_INC_DIR
}
)
target_link_libraries
(
ngraph INTERFACE
${
NGRAPH_SHARED_LIB
}
)
LIST
(
APPEND external_project_dependencies ngraph
)
cmake/external/protobuf.cmake
浏览文件 @
61fa5218
...
@@ -30,66 +30,61 @@ UNSET_VAR(PROTOBUF_LITE_LIBRARY)
...
@@ -30,66 +30,61 @@ UNSET_VAR(PROTOBUF_LITE_LIBRARY)
UNSET_VAR
(
PROTOBUF_LIBRARY
)
UNSET_VAR
(
PROTOBUF_LIBRARY
)
UNSET_VAR
(
PROTOBUF_INCLUDE_DIR
)
UNSET_VAR
(
PROTOBUF_INCLUDE_DIR
)
UNSET_VAR
(
Protobuf_PROTOC_EXECUTABLE
)
UNSET_VAR
(
Protobuf_PROTOC_EXECUTABLE
)
function
(
protobuf_generate_python SRCS
)
# shameless copy from https://github.com/Kitware/CMake/blob/master/Modules/FindProtobuf.cmake
if
(
NOT ARGN
)
message
(
SEND_ERROR
"Error: PROTOBUF_GENERATE_PYTHON() called without any proto files"
)
return
()
endif
()
if
(
NOT COMMAND protobuf_generate_python
)
# before cmake 3.4, protobuf_genrerate_python is not defined.
if
(
PROTOBUF_GENERATE_CPP_APPEND_PATH
)
function
(
protobuf_generate_python SRCS
)
# Create an include path for each file specified
# shameless copy from https://github.com/Kitware/CMake/blob/master/Modules/FindProtobuf.cmake
if
(
NOT ARGN
)
message
(
SEND_ERROR
"Error: PROTOBUF_GENERATE_PYTHON() called without any proto files"
)
return
()
endif
()
if
(
PROTOBUF_GENERATE_CPP_APPEND_PATH
)
# Create an include path for each file specified
foreach
(
FIL
${
ARGN
}
)
get_filename_component
(
ABS_FIL
${
FIL
}
ABSOLUTE
)
get_filename_component
(
ABS_PATH
${
ABS_FIL
}
PATH
)
list
(
FIND _protobuf_include_path
${
ABS_PATH
}
_contains_already
)
if
(
${
_contains_already
}
EQUAL -1
)
list
(
APPEND _protobuf_include_path -I
${
ABS_PATH
}
)
endif
()
endforeach
()
else
()
set
(
_protobuf_include_path -I
${
CMAKE_CURRENT_SOURCE_DIR
}
)
endif
()
if
(
DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS
)
set
(
Protobuf_IMPORT_DIRS
"
${
PROTOBUF_IMPORT_DIRS
}
"
)
endif
()
if
(
DEFINED Protobuf_IMPORT_DIRS
)
foreach
(
DIR
${
Protobuf_IMPORT_DIRS
}
)
get_filename_component
(
ABS_PATH
${
DIR
}
ABSOLUTE
)
list
(
FIND _protobuf_include_path
${
ABS_PATH
}
_contains_already
)
if
(
${
_contains_already
}
EQUAL -1
)
list
(
APPEND _protobuf_include_path -I
${
ABS_PATH
}
)
endif
()
endforeach
()
endif
()
set
(
${
SRCS
}
)
foreach
(
FIL
${
ARGN
}
)
foreach
(
FIL
${
ARGN
}
)
get_filename_component
(
ABS_FIL
${
FIL
}
ABSOLUTE
)
get_filename_component
(
ABS_FIL
${
FIL
}
ABSOLUTE
)
get_filename_component
(
FIL_WE
${
FIL
}
NAME_WE
)
get_filename_component
(
ABS_PATH
${
ABS_FIL
}
PATH
)
if
(
NOT PROTOBUF_GENERATE_CPP_APPEND_PATH
)
list
(
FIND _protobuf_include_path
${
ABS_PATH
}
_contains_already
)
get_filename_component
(
FIL_DIR
${
FIL
}
DIRECTORY
)
if
(
${
_contains_already
}
EQUAL -1
)
if
(
FIL_DIR
)
list
(
APPEND _protobuf_include_path -I
${
ABS_PATH
}
)
set
(
FIL_WE
"
${
FIL_DIR
}
/
${
FIL_WE
}
"
)
endif
()
endif
()
endif
()
endforeach
()
else
()
set
(
_protobuf_include_path -I
${
CMAKE_CURRENT_SOURCE_DIR
}
)
endif
()
if
(
DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS
)
set
(
Protobuf_IMPORT_DIRS
"
${
PROTOBUF_IMPORT_DIRS
}
"
)
endif
()
list
(
APPEND
${
SRCS
}
"
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
FIL_WE
}
_pb2.py"
)
if
(
DEFINED Protobuf_IMPORT_DIRS
)
add_custom_command
(
foreach
(
DIR
${
Protobuf_IMPORT_DIRS
}
)
OUTPUT
"
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
FIL_WE
}
_pb2.py"
get_filename_component
(
ABS_PATH
${
DIR
}
ABSOLUTE
)
COMMAND
${
Protobuf_PROTOC_EXECUTABLE
}
--python_out
${
CMAKE_CURRENT_BINARY_DIR
}
${
_protobuf_include_path
}
${
ABS_FIL
}
list
(
FIND _protobuf_include_path
${
ABS_PATH
}
_contains_already
)
DEPENDS
${
ABS_FIL
}
${
Protobuf_PROTOC_EXECUTABLE
}
if
(
${
_contains_already
}
EQUAL -1
)
COMMENT
"Running Python protocol buffer compiler on
${
FIL
}
"
list
(
APPEND _protobuf_include_path -I
${
ABS_PATH
}
)
VERBATIM
)
endif
(
)
endforeach
()
endforeach
()
endif
()
set
(
${
SRCS
}
${${
SRCS
}}
PARENT_SCOPE
)
set
(
${
SRCS
}
)
endfunction
()
foreach
(
FIL
${
ARGN
}
)
endif
()
get_filename_component
(
ABS_FIL
${
FIL
}
ABSOLUTE
)
get_filename_component
(
FIL_WE
${
FIL
}
NAME_WE
)
if
(
NOT PROTOBUF_GENERATE_CPP_APPEND_PATH
)
get_filename_component
(
FIL_DIR
${
FIL
}
DIRECTORY
)
if
(
FIL_DIR
)
set
(
FIL_WE
"
${
FIL_DIR
}
/
${
FIL_WE
}
"
)
endif
()
endif
()
list
(
APPEND
${
SRCS
}
"
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
FIL_WE
}
_pb2.py"
)
add_custom_command
(
OUTPUT
"
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
FIL_WE
}
_pb2.py"
COMMAND
${
PROTOBUF_PROTOC_EXECUTABLE
}
--python_out
${
CMAKE_CURRENT_BINARY_DIR
}
${
_protobuf_include_path
}
${
ABS_FIL
}
DEPENDS
${
ABS_FIL
}
${
PROTOBUF_PROTOC_EXECUTABLE
}
COMMENT
"Running Python protocol buffer compiler on
${
FIL
}
"
VERBATIM
)
endforeach
()
set
(
${
SRCS
}
${${
SRCS
}}
PARENT_SCOPE
)
endfunction
()
# Print and set the protobuf library information,
# Print and set the protobuf library information,
# finish this cmake process and exit from this file.
# finish this cmake process and exit from this file.
...
@@ -126,6 +121,7 @@ macro(PROMPT_PROTOBUF_LIB)
...
@@ -126,6 +121,7 @@ macro(PROMPT_PROTOBUF_LIB)
# FIND_Protobuf.cmake uses `Protobuf_PROTOC_EXECUTABLE`.
# FIND_Protobuf.cmake uses `Protobuf_PROTOC_EXECUTABLE`.
# make `protobuf_generate_cpp` happy.
# make `protobuf_generate_cpp` happy.
SET
(
Protobuf_PROTOC_EXECUTABLE
${
PROTOBUF_PROTOC_EXECUTABLE
}
)
SET
(
Protobuf_PROTOC_EXECUTABLE
${
PROTOBUF_PROTOC_EXECUTABLE
}
)
FOREACH
(
dep
${
protobuf_DEPS
}
)
FOREACH
(
dep
${
protobuf_DEPS
}
)
ADD_DEPENDENCIES
(
protobuf
${
dep
}
)
ADD_DEPENDENCIES
(
protobuf
${
dep
}
)
ADD_DEPENDENCIES
(
protobuf_lite
${
dep
}
)
ADD_DEPENDENCIES
(
protobuf_lite
${
dep
}
)
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
61fa5218
...
@@ -5,6 +5,8 @@ list(REMOVE_DUPLICATES GENERAL_OPS)
...
@@ -5,6 +5,8 @@ list(REMOVE_DUPLICATES GENERAL_OPS)
set
(
DEPS_OPS
""
)
set
(
DEPS_OPS
""
)
set
(
pybind_file
${
PADDLE_BINARY_DIR
}
/paddle/fluid/pybind/pybind.h
)
set
(
pybind_file
${
PADDLE_BINARY_DIR
}
/paddle/fluid/pybind/pybind.h
)
file
(
WRITE
${
pybind_file
}
"// Generated by the paddle/fluid/operator/CMakeLists.txt. DO NOT EDIT!
\n\n
"
)
file
(
WRITE
${
pybind_file
}
"// Generated by the paddle/fluid/operator/CMakeLists.txt. DO NOT EDIT!
\n\n
"
)
set
(
PART_CUDA_KERNEL_FILES
)
function
(
op_library TARGET
)
function
(
op_library TARGET
)
# op_library is a function to create op library. The interface is same as
# op_library is a function to create op library. The interface is same as
# cc_library. But it handle split GPU/CPU code and link some common library
# cc_library. But it handle split GPU/CPU code and link some common library
...
@@ -37,6 +39,12 @@ function(op_library TARGET)
...
@@ -37,6 +39,12 @@ function(op_library TARGET)
if
(
EXISTS
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
TARGET
}
.cu
)
if
(
EXISTS
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
TARGET
}
.cu
)
list
(
APPEND cu_srcs
${
TARGET
}
.cu
)
list
(
APPEND cu_srcs
${
TARGET
}
.cu
)
endif
()
endif
()
if
(
EXISTS
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
TARGET
}
.part.cu
)
set
(
PART_CUDA_KERNEL_FILES
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
TARGET
}
.part.cu
${
PART_CUDA_KERNEL_FILES
}
PARENT_SCOPE
)
list
(
APPEND cu_srcs
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
TARGET
}
.part.cu
)
endif
()
if
(
EXISTS
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
TARGET
}
.hip.cu
)
if
(
EXISTS
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
TARGET
}
.hip.cu
)
list
(
APPEND hip_cu_srcs
${
TARGET
}
.hip.cu
)
list
(
APPEND hip_cu_srcs
${
TARGET
}
.hip.cu
)
endif
()
endif
()
...
@@ -330,6 +338,8 @@ foreach(src ${GENERAL_OPS})
...
@@ -330,6 +338,8 @@ foreach(src ${GENERAL_OPS})
endforeach
()
endforeach
()
file
(
APPEND
${
pybind_file
}
"USE_OP(less_than);
\n
USE_OP(logical_and);
\n
USE_NO_KERNEL_OP(read_from_array);
\n
"
)
file
(
APPEND
${
pybind_file
}
"USE_OP(less_than);
\n
USE_OP(logical_and);
\n
USE_NO_KERNEL_OP(read_from_array);
\n
"
)
if
(
NOT WIN32
)
if
(
NOT WIN32
)
add_subdirectory
(
reader
)
add_subdirectory
(
reader
)
endif
(
NOT WIN32
)
endif
(
NOT WIN32
)
...
@@ -356,3 +366,14 @@ if(NOT WIN32)
...
@@ -356,3 +366,14 @@ if(NOT WIN32)
nv_test
(
nccl_op_test SRCS nccl_op_test.cu.cc DEPS nccl_op gpu_info device_context
)
nv_test
(
nccl_op_test SRCS nccl_op_test.cu.cc DEPS nccl_op gpu_info device_context
)
endif
()
endif
()
nv_test
(
dropout_op_test SRCS dropout_op_test.cc DEPS dropout_op tensor
)
nv_test
(
dropout_op_test SRCS dropout_op_test.cc DEPS dropout_op tensor
)
if
(
WITH_GPU
)
foreach
(
CUDA_KERNEL_FILE
${
PART_CUDA_KERNEL_FILES
}
)
file
(
READ
${
CUDA_KERNEL_FILE
}
TARGET_CONTENT
)
string
(
REGEX MATCH
"REGISTER_OP_CUDA_KERNEL
\\
(
\\
n?([^,]+),.*"
MATCHED
${
TARGET_CONTENT
}
)
if
(
MATCHED
)
string
(
STRIP
${
CMAKE_MATCH_1
}
MATCHED
)
file
(
APPEND
${
pybind_file
}
"USE_OP_DEVICE_KERNEL(
${
MATCHED
}
, CUDA);
\n
"
)
endif
()
endforeach
()
endif
()
paddle/fluid/operators/gather.cu.h
浏览文件 @
61fa5218
...
@@ -50,7 +50,9 @@ void GPUGather(const platform::DeviceContext& ctx, const Tensor& src,
...
@@ -50,7 +50,9 @@ void GPUGather(const platform::DeviceContext& ctx, const Tensor& src,
const
Tensor
&
index
,
Tensor
*
output
)
{
const
Tensor
&
index
,
Tensor
*
output
)
{
// PADDLE_ENFORCE(platform::is_gpu_place(place));
// PADDLE_ENFORCE(platform::is_gpu_place(place));
// check index of shape 1-D
// check index of shape 1-D
PADDLE_ENFORCE
(
index
.
dims
().
size
()
==
1
);
PADDLE_ENFORCE
(
index
.
dims
().
size
()
==
1
||
(
index
.
dims
().
size
()
==
2
&&
index
.
dims
()[
1
]
==
1
));
int
index_size
=
index
.
dims
()[
0
];
int
index_size
=
index
.
dims
()[
0
];
auto
src_dims
=
src
.
dims
();
auto
src_dims
=
src
.
dims
();
...
...
paddle/fluid/operators/gather.h
浏览文件 @
61fa5218
...
@@ -38,7 +38,8 @@ void CPUGather(const platform::DeviceContext& ctx, const Tensor& src,
...
@@ -38,7 +38,8 @@ void CPUGather(const platform::DeviceContext& ctx, const Tensor& src,
const
Tensor
&
index
,
Tensor
*
output
)
{
const
Tensor
&
index
,
Tensor
*
output
)
{
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()));
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()));
// check index of shape 1-D
// check index of shape 1-D
PADDLE_ENFORCE
(
index
.
dims
().
size
()
==
1
);
PADDLE_ENFORCE
(
index
.
dims
().
size
()
==
1
||
(
index
.
dims
().
size
()
==
2
&&
index
.
dims
()[
1
]
==
1
));
int64_t
index_size
=
index
.
dims
()[
0
];
int64_t
index_size
=
index
.
dims
()[
0
];
auto
src_dims
=
src
.
dims
();
auto
src_dims
=
src
.
dims
();
...
...
paddle/fluid/operators/gather_op.cc
浏览文件 @
61fa5218
...
@@ -31,7 +31,8 @@ class GatherOp : public framework::OperatorWithKernel {
...
@@ -31,7 +31,8 @@ class GatherOp : public framework::OperatorWithKernel {
"Output(Out) of GatherOp should not be null."
);
"Output(Out) of GatherOp should not be null."
);
auto
index_dims
=
ctx
->
GetInputDim
(
"Index"
);
auto
index_dims
=
ctx
->
GetInputDim
(
"Index"
);
PADDLE_ENFORCE
(
index_dims
.
size
()
==
1
);
PADDLE_ENFORCE
(
index_dims
.
size
()
==
1
||
(
index_dims
.
size
()
==
2
&&
index_dims
[
1
]
==
1
));
int
batch_size
=
ctx
->
GetInputDim
(
"Index"
)[
0
];
int
batch_size
=
ctx
->
GetInputDim
(
"Index"
)[
0
];
framework
::
DDim
output_dims
(
ctx
->
GetInputDim
(
"X"
));
framework
::
DDim
output_dims
(
ctx
->
GetInputDim
(
"X"
));
output_dims
[
0
]
=
batch_size
;
output_dims
[
0
]
=
batch_size
;
...
@@ -53,6 +54,7 @@ class GatherGradOp : public framework::OperatorWithKernel {
...
@@ -53,6 +54,7 @@ class GatherGradOp : public framework::OperatorWithKernel {
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
ctx
->
GetInputDim
(
"X"
));
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
ctx
->
GetInputDim
(
"X"
));
ctx
->
ShareLoD
(
"X"
,
/*-->*/
framework
::
GradVarName
(
"X"
));
}
}
protected:
protected:
...
@@ -75,7 +77,7 @@ Gather Operator.
...
@@ -75,7 +77,7 @@ Gather Operator.
$Out = X[Index]$
$Out = X[Index]$
Out is obtained by gathering entries of the outer-most dimension
Out is obtained by gathering entries of the outer-most dimension
of X indexed by Index and concatenate them together.
of X indexed by Index and concatenate them together.
Example:
Example:
...
...
paddle/fluid/operators/reduce_max_op.cu
浏览文件 @
61fa5218
...
@@ -23,12 +23,3 @@ REGISTER_OP_CUDA_KERNEL(reduce_max,
...
@@ -23,12 +23,3 @@ REGISTER_OP_CUDA_KERNEL(reduce_max,
int
,
ops
::
MaxFunctor
>
,
int
,
ops
::
MaxFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
,
ops
::
MaxFunctor
>
);
int64_t
,
ops
::
MaxFunctor
>
);
REGISTER_OP_CUDA_KERNEL
(
reduce_max_grad
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
,
ops
::
MaxOrMinGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
,
ops
::
MaxOrMinGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
,
ops
::
MaxOrMinGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
,
ops
::
MaxOrMinGradFunctor
>
);
paddle/fluid/operators/reduce_max_op.part.cu
0 → 100644
浏览文件 @
61fa5218
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reduce_min_max_op.h"
REGISTER_OP_CUDA_KERNEL
(
reduce_max_grad
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
,
ops
::
MaxOrMinGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
,
ops
::
MaxOrMinGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
,
ops
::
MaxOrMinGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
,
ops
::
MaxOrMinGradFunctor
>
);
paddle/fluid/operators/reduce_mean_op.cu
浏览文件 @
61fa5218
...
@@ -69,13 +69,3 @@ REGISTER_OP_CUDA_KERNEL(reduce_mean, ops::ReduceMeanKernel<float>,
...
@@ -69,13 +69,3 @@ REGISTER_OP_CUDA_KERNEL(reduce_mean, ops::ReduceMeanKernel<float>,
ops
::
ReduceMeanKernel
<
double
>
,
ops
::
ReduceMeanKernel
<
double
>
,
ops
::
ReduceMeanKernel
<
int
>
,
ops
::
ReduceMeanKernel
<
int
>
,
ops
::
ReduceMeanKernel
<
int64_t
>
);
ops
::
ReduceMeanKernel
<
int64_t
>
);
REGISTER_OP_CUDA_KERNEL
(
reduce_mean_grad
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
,
ops
::
MeanGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
,
ops
::
MeanGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
,
ops
::
MeanGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
,
ops
::
MeanGradFunctor
>
);
paddle/fluid/operators/reduce_mean_op.part.cu
0 → 100644
浏览文件 @
61fa5218
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// .part used to speed up nvcc compile
#include "paddle/fluid/operators/reduce_mean_op.h"
REGISTER_OP_CUDA_KERNEL
(
reduce_mean_grad
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
,
ops
::
MeanGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
,
ops
::
MeanGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
,
ops
::
MeanGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
,
ops
::
MeanGradFunctor
>
);
paddle/fluid/operators/reduce_min_op.cu
浏览文件 @
61fa5218
...
@@ -23,12 +23,3 @@ REGISTER_OP_CUDA_KERNEL(reduce_min,
...
@@ -23,12 +23,3 @@ REGISTER_OP_CUDA_KERNEL(reduce_min,
int
,
ops
::
MinFunctor
>
,
int
,
ops
::
MinFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
,
ops
::
MinFunctor
>
);
int64_t
,
ops
::
MinFunctor
>
);
REGISTER_OP_CUDA_KERNEL
(
reduce_min_grad
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
,
ops
::
MaxOrMinGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
,
ops
::
MaxOrMinGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
,
ops
::
MaxOrMinGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
,
ops
::
MaxOrMinGradFunctor
>
);
paddle/fluid/operators/reduce_min_op.part.cu
0 → 100644
浏览文件 @
61fa5218
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reduce_min_max_op.h"
REGISTER_OP_CUDA_KERNEL
(
reduce_min_grad
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
,
ops
::
MaxOrMinGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
,
ops
::
MaxOrMinGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
,
ops
::
MaxOrMinGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
,
ops
::
MaxOrMinGradFunctor
>
);
paddle/fluid/operators/reduce_prod_op.cu
浏览文件 @
61fa5218
...
@@ -23,12 +23,3 @@ REGISTER_OP_CUDA_KERNEL(reduce_prod,
...
@@ -23,12 +23,3 @@ REGISTER_OP_CUDA_KERNEL(reduce_prod,
int
,
ops
::
ProdFunctor
>
,
int
,
ops
::
ProdFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
,
ops
::
ProdFunctor
>
);
int64_t
,
ops
::
ProdFunctor
>
);
REGISTER_OP_CUDA_KERNEL
(
reduce_prod_grad
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
,
ops
::
ProdGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
,
ops
::
ProdGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
,
ops
::
ProdGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
,
ops
::
ProdGradFunctor
>
);
paddle/fluid/operators/reduce_prod_op.part.cu
0 → 100644
浏览文件 @
61fa5218
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reduce_prod_op.h"
REGISTER_OP_CUDA_KERNEL
(
reduce_prod_grad
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
,
ops
::
ProdGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
,
ops
::
ProdGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
,
ops
::
ProdGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
,
ops
::
ProdGradFunctor
>
);
paddle/fluid/operators/reduce_sum_op.cu
浏览文件 @
61fa5218
...
@@ -64,13 +64,3 @@ class ReduceSumKernel : public framework::OpKernel<T> {
...
@@ -64,13 +64,3 @@ class ReduceSumKernel : public framework::OpKernel<T> {
REGISTER_OP_CUDA_KERNEL
(
reduce_sum
,
ops
::
ReduceSumKernel
<
float
>
,
REGISTER_OP_CUDA_KERNEL
(
reduce_sum
,
ops
::
ReduceSumKernel
<
float
>
,
ops
::
ReduceSumKernel
<
double
>
,
ops
::
ReduceSumKernel
<
int
>
,
ops
::
ReduceSumKernel
<
double
>
,
ops
::
ReduceSumKernel
<
int
>
,
ops
::
ReduceSumKernel
<
int64_t
>
);
ops
::
ReduceSumKernel
<
int64_t
>
);
REGISTER_OP_CUDA_KERNEL
(
reduce_sum_grad
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
,
ops
::
SumGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
,
ops
::
SumGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
,
ops
::
SumGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
,
ops
::
SumGradFunctor
>
);
paddle/fluid/operators/reduce_sum_op.part.cu
0 → 100644
浏览文件 @
61fa5218
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/cub_reduce.h"
#include "paddle/fluid/operators/reduce_sum_op.h"
REGISTER_OP_CUDA_KERNEL
(
reduce_sum_grad
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
,
ops
::
SumGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
,
ops
::
SumGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
,
ops
::
SumGradFunctor
>
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
,
ops
::
SumGradFunctor
>
);
paddle/fluid/operators/scatter.cu.h
浏览文件 @
61fa5218
...
@@ -51,7 +51,8 @@ void GPUScatterAssign(const platform::DeviceContext& ctx, const Tensor& src,
...
@@ -51,7 +51,8 @@ void GPUScatterAssign(const platform::DeviceContext& ctx, const Tensor& src,
const
Tensor
&
index
,
Tensor
*
output
)
{
const
Tensor
&
index
,
Tensor
*
output
)
{
// PADDLE_ENFORCE(platform::is_gpu_place(place));
// PADDLE_ENFORCE(platform::is_gpu_place(place));
// check index of shape 1-D
// check index of shape 1-D
PADDLE_ENFORCE
(
index
.
dims
().
size
()
==
1
);
PADDLE_ENFORCE
(
index
.
dims
().
size
()
==
1
||
(
index
.
dims
().
size
()
==
2
&&
index
.
dims
()[
1
]
==
1
));
int
index_size
=
index
.
dims
()[
0
];
int
index_size
=
index
.
dims
()[
0
];
auto
src_dims
=
src
.
dims
();
auto
src_dims
=
src
.
dims
();
...
...
paddle/fluid/operators/scatter.h
浏览文件 @
61fa5218
...
@@ -37,7 +37,8 @@ void ScatterAssign(const platform::DeviceContext& ctx, const Tensor& src,
...
@@ -37,7 +37,8 @@ void ScatterAssign(const platform::DeviceContext& ctx, const Tensor& src,
const
Tensor
&
index
,
Tensor
*
output
)
{
const
Tensor
&
index
,
Tensor
*
output
)
{
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()));
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()));
// check index of shape 1-D
// check index of shape 1-D
PADDLE_ENFORCE
(
index
.
dims
().
size
()
==
1
);
PADDLE_ENFORCE
(
index
.
dims
().
size
()
==
1
||
(
index
.
dims
().
size
()
==
2
&&
index
.
dims
()[
1
]
==
1
));
int
index_size
=
index
.
dims
()[
0
];
int
index_size
=
index
.
dims
()[
0
];
auto
src_dims
=
src
.
dims
();
auto
src_dims
=
src
.
dims
();
...
...
paddle/fluid/operators/stack_op.cc
浏览文件 @
61fa5218
...
@@ -21,8 +21,12 @@ REGISTER_OPERATOR(stack, ops::StackOp, ops::StackOpMaker,
...
@@ -21,8 +21,12 @@ REGISTER_OPERATOR(stack, ops::StackOp, ops::StackOpMaker,
REGISTER_OPERATOR
(
stack_grad
,
ops
::
StackOpGrad
);
REGISTER_OPERATOR
(
stack_grad
,
ops
::
StackOpGrad
);
REGISTER_OP_CPU_KERNEL
(
stack
,
ops
::
StackKernel
<
plat
::
CPUDeviceContext
,
float
>
,
REGISTER_OP_CPU_KERNEL
(
stack
,
ops
::
StackKernel
<
plat
::
CPUDeviceContext
,
float
>
,
ops
::
StackKernel
<
plat
::
CPUDeviceContext
,
double
>
);
ops
::
StackKernel
<
plat
::
CPUDeviceContext
,
double
>
,
ops
::
StackKernel
<
plat
::
CPUDeviceContext
,
int
>
,
ops
::
StackKernel
<
plat
::
CPUDeviceContext
,
int64_t
>
);
REGISTER_OP_CPU_KERNEL
(
stack_grad
,
REGISTER_OP_CPU_KERNEL
(
stack_grad
,
ops
::
StackGradKernel
<
plat
::
CPUDeviceContext
,
float
>
,
ops
::
StackGradKernel
<
plat
::
CPUDeviceContext
,
float
>
,
ops
::
StackGradKernel
<
plat
::
CPUDeviceContext
,
double
>
);
ops
::
StackGradKernel
<
plat
::
CPUDeviceContext
,
double
>
,
ops
::
StackGradKernel
<
plat
::
CPUDeviceContext
,
int
>
,
ops
::
StackGradKernel
<
plat
::
CPUDeviceContext
,
int64_t
>
);
paddle/fluid/operators/stack_op.cu
浏览文件 @
61fa5218
...
@@ -18,8 +18,12 @@ namespace plat = paddle::platform;
...
@@ -18,8 +18,12 @@ namespace plat = paddle::platform;
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
stack
,
ops
::
StackKernel
<
plat
::
CUDADeviceContext
,
float
>
,
REGISTER_OP_CUDA_KERNEL
(
stack
,
ops
::
StackKernel
<
plat
::
CUDADeviceContext
,
float
>
,
ops
::
StackKernel
<
plat
::
CUDADeviceContext
,
double
>
);
ops
::
StackKernel
<
plat
::
CUDADeviceContext
,
double
>
,
ops
::
StackKernel
<
plat
::
CUDADeviceContext
,
int
>
,
ops
::
StackKernel
<
plat
::
CUDADeviceContext
,
int64_t
>
);
REGISTER_OP_CUDA_KERNEL
(
stack_grad
,
REGISTER_OP_CUDA_KERNEL
(
stack_grad
,
ops
::
StackGradKernel
<
plat
::
CUDADeviceContext
,
float
>
,
ops
::
StackGradKernel
<
plat
::
CUDADeviceContext
,
float
>
,
ops
::
StackGradKernel
<
plat
::
CUDADeviceContext
,
double
>
);
ops
::
StackGradKernel
<
plat
::
CUDADeviceContext
,
double
>
,
ops
::
StackGradKernel
<
plat
::
CUDADeviceContext
,
int
>
,
ops
::
StackGradKernel
<
plat
::
CUDADeviceContext
,
int64_t
>
);
paddle/scripts/paddle_build.sh
浏览文件 @
61fa5218
...
@@ -139,6 +139,7 @@ function cmake_gen() {
...
@@ -139,6 +139,7 @@ function cmake_gen() {
-DWITH_AMD_GPU=
${
WITH_AMD_GPU
:-
OFF
}
-DWITH_AMD_GPU=
${
WITH_AMD_GPU
:-
OFF
}
-DWITH_DISTRIBUTE=
${
WITH_DISTRIBUTE
:-
OFF
}
-DWITH_DISTRIBUTE=
${
WITH_DISTRIBUTE
:-
OFF
}
-DWITH_MKL=
${
WITH_MKL
:-
ON
}
-DWITH_MKL=
${
WITH_MKL
:-
ON
}
-DWITH_NGRAPH=
${
WITH_NGRAPH
:-
OFF
}
-DWITH_AVX=
${
WITH_AVX
:-
OFF
}
-DWITH_AVX=
${
WITH_AVX
:-
OFF
}
-DWITH_GOLANG=
${
WITH_GOLANG
:-
OFF
}
-DWITH_GOLANG=
${
WITH_GOLANG
:-
OFF
}
-DCUDA_ARCH_NAME=
${
CUDA_ARCH_NAME
:-
All
}
-DCUDA_ARCH_NAME=
${
CUDA_ARCH_NAME
:-
All
}
...
@@ -171,6 +172,7 @@ EOF
...
@@ -171,6 +172,7 @@ EOF
-DWITH_AMD_GPU
=
${
WITH_AMD_GPU
:-
OFF
}
\
-DWITH_AMD_GPU
=
${
WITH_AMD_GPU
:-
OFF
}
\
-DWITH_DISTRIBUTE
=
${
WITH_DISTRIBUTE
:-
OFF
}
\
-DWITH_DISTRIBUTE
=
${
WITH_DISTRIBUTE
:-
OFF
}
\
-DWITH_MKL
=
${
WITH_MKL
:-
ON
}
\
-DWITH_MKL
=
${
WITH_MKL
:-
ON
}
\
-DWITH_NGRAPH
=
${
WITH_NGRAPH
:-
OFF
}
\
-DWITH_AVX
=
${
WITH_AVX
:-
OFF
}
\
-DWITH_AVX
=
${
WITH_AVX
:-
OFF
}
\
-DWITH_GOLANG
=
${
WITH_GOLANG
:-
OFF
}
\
-DWITH_GOLANG
=
${
WITH_GOLANG
:-
OFF
}
\
-DCUDA_ARCH_NAME
=
${
CUDA_ARCH_NAME
:-
All
}
\
-DCUDA_ARCH_NAME
=
${
CUDA_ARCH_NAME
:-
All
}
\
...
...
python/setup.py.in
浏览文件 @
61fa5218
...
@@ -179,6 +179,18 @@ if '${CMAKE_BUILD_TYPE}' == 'Release':
...
@@ -179,6 +179,18 @@ if '${CMAKE_BUILD_TYPE}' == 'Release':
raise Exception("patch libmkldnn.so failed, command: %s" % command)
raise Exception("patch libmkldnn.so failed, command: %s" % command)
package_data['paddle.libs']+=['libmkldnn.so.0']
package_data['paddle.libs']+=['libmkldnn.so.0']
shutil.copy('${MKLDNN_SHARED_LIB}', libs_path)
shutil.copy('${MKLDNN_SHARED_LIB}', libs_path)
if '${WITH_NGRAPH}' == 'ON':
if '${CMAKE_BUILD_TYPE}' == 'Release':
# only change rpath in Release mode.
command = "patchelf --set-rpath '$ORIGIN/' ${NGRAPH_SHARED_LIB}"
if os.system(command) != 0:
raise Exception("patch ${NGRAPH_SHARED_LIB_NAME} failed, command: %s" % command)
shutil.copy('${NGRAPH_SHARED_LIB}', libs_path)
shutil.copy('${NGRAPH_CPU_LIB}', libs_path)
shutil.copy('${NGRAPH_TBB_LIB}', libs_path)
package_data['paddle.libs']+=['${NGRAPH_SHARED_LIB_NAME}',
'${NGRAPH_CPU_LIB_NAME}',
'${NGRAPH_TBB_LIB_NAME}']
# remove unused paddle/libs/__init__.py
# remove unused paddle/libs/__init__.py
if os.path.isfile(libs_path+'/__init__.py'):
if os.path.isfile(libs_path+'/__init__.py'):
os.remove(libs_path+'/__init__.py')
os.remove(libs_path+'/__init__.py')
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录