Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
37faf495
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
37faf495
编写于
9月 20, 2017
作者:
R
ranqiu
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into r-doc
上级
e635e3fd
d865b047
变更
49
隐藏空白更改
内联
并排
Showing
49 changed file
with
1539 addition
and
357 deletion
+1539
-357
.travis.yml
.travis.yml
+0
-4
CMakeLists.txt
CMakeLists.txt
+16
-14
cmake/cblas.cmake
cmake/cblas.cmake
+7
-0
cmake/cross_compiling/ios.cmake
cmake/cross_compiling/ios.cmake
+350
-0
cmake/external/gflags.cmake
cmake/external/gflags.cmake
+8
-7
cmake/external/glog.cmake
cmake/external/glog.cmake
+11
-10
cmake/external/gtest.cmake
cmake/external/gtest.cmake
+10
-9
cmake/external/openblas.cmake
cmake/external/openblas.cmake
+23
-10
cmake/external/protobuf.cmake
cmake/external/protobuf.cmake
+2
-1
cmake/external/python.cmake
cmake/external/python.cmake
+10
-13
cmake/external/swig.cmake
cmake/external/swig.cmake
+4
-0
cmake/external/warpctc.cmake
cmake/external/warpctc.cmake
+22
-29
cmake/external/zlib.cmake
cmake/external/zlib.cmake
+9
-8
cmake/flags.cmake
cmake/flags.cmake
+4
-2
cmake/system.cmake
cmake/system.cmake
+9
-5
cmake/util.cmake
cmake/util.cmake
+3
-1
paddle/CMakeLists.txt
paddle/CMakeLists.txt
+1
-1
paddle/capi/CMakeLists.txt
paddle/capi/CMakeLists.txt
+29
-33
paddle/function/neon/NeonDepthwiseConv.cpp
paddle/function/neon/NeonDepthwiseConv.cpp
+1
-1
paddle/gserver/activations/ActivationFunction.cpp
paddle/gserver/activations/ActivationFunction.cpp
+10
-1
paddle/gserver/activations/MKLDNNActivation.cpp
paddle/gserver/activations/MKLDNNActivation.cpp
+87
-0
paddle/gserver/activations/MKLDNNActivation.h
paddle/gserver/activations/MKLDNNActivation.h
+182
-0
paddle/gserver/layers/MKLDNNConvLayer.cpp
paddle/gserver/layers/MKLDNNConvLayer.cpp
+1
-4
paddle/gserver/layers/MKLDNNFcLayer.cpp
paddle/gserver/layers/MKLDNNFcLayer.cpp
+1
-3
paddle/gserver/layers/MKLDNNLayer.h
paddle/gserver/layers/MKLDNNLayer.h
+4
-0
paddle/gserver/layers/MKLDNNPoolLayer.cpp
paddle/gserver/layers/MKLDNNPoolLayer.cpp
+0
-1
paddle/gserver/layers/SequenceSliceLayer.cpp
paddle/gserver/layers/SequenceSliceLayer.cpp
+5
-4
paddle/gserver/tests/MKLDNNTester.cpp
paddle/gserver/tests/MKLDNNTester.cpp
+30
-10
paddle/gserver/tests/MKLDNNTester.h
paddle/gserver/tests/MKLDNNTester.h
+1
-2
paddle/gserver/tests/test_MKLDNN.cpp
paddle/gserver/tests/test_MKLDNN.cpp
+45
-2
paddle/math/MathFunctions.h
paddle/math/MathFunctions.h
+1
-1
paddle/operators/cross_entropy_op.cc
paddle/operators/cross_entropy_op.cc
+147
-0
paddle/operators/cross_entropy_op.cu
paddle/operators/cross_entropy_op.cu
+158
-0
paddle/operators/cross_entropy_op.h
paddle/operators/cross_entropy_op.h
+117
-0
paddle/operators/onehot_cross_entropy_op.cc
paddle/operators/onehot_cross_entropy_op.cc
+0
-85
paddle/operators/prelu_op.h
paddle/operators/prelu_op.h
+6
-4
paddle/platform/transform.h
paddle/platform/transform.h
+55
-29
paddle/platform/transform_test.cu
paddle/platform/transform_test.cu
+11
-5
paddle/pserver/CMakeLists.txt
paddle/pserver/CMakeLists.txt
+12
-8
paddle/scripts/travis/build_ios.sh
paddle/scripts/travis/build_ios.sh
+20
-0
paddle/scripts/travis/check_style.sh
paddle/scripts/travis/check_style.sh
+6
-0
paddle/trainer/CMakeLists.txt
paddle/trainer/CMakeLists.txt
+16
-14
paddle/utils/Excepts.h
paddle/utils/Excepts.h
+2
-1
paddle/utils/arch/linux/Locks.cpp
paddle/utils/arch/linux/Locks.cpp
+6
-0
paddle/utils/arch/osx/Excepts.cpp
paddle/utils/arch/osx/Excepts.cpp
+2
-1
python/paddle/v2/framework/tests/test_cross_entropy_op.py
python/paddle/v2/framework/tests/test_cross_entropy_op.py
+89
-0
python/paddle/v2/framework/tests/test_mnist.py
python/paddle/v2/framework/tests/test_mnist.py
+4
-2
python/paddle/v2/framework/tests/test_onehot_cross_entropy_op.py
...paddle/v2/framework/tests/test_onehot_cross_entropy_op.py
+0
-30
python/paddle/v2/framework/tests/test_prelu_op.py
python/paddle/v2/framework/tests/test_prelu_op.py
+2
-2
未找到文件。
.travis.yml
浏览文件 @
37faf495
...
...
@@ -36,10 +36,6 @@ before_install:
# protobuf version.
-
sudo pip install -r $TRAVIS_BUILD_DIR/python/requirements.txt
-
sudo pip install wheel sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit LinkChecker
-
curl https://glide.sh/get | bash
-
eval "$(GIMME_GO_VERSION=1.8.3 gimme)"
-
go get -u github.com/alecthomas/gometalinter
-
gometalinter --install
-
|
function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; }
script
:
...
...
CMakeLists.txt
浏览文件 @
37faf495
...
...
@@ -27,7 +27,7 @@ if(NOT CMAKE_CROSSCOMPILING)
endif
(
NOT CMAKE_CROSSCOMPILING
)
find_package
(
Git REQUIRED
)
find_package
(
Threads REQUIRED
)
if
(
NOT ANDROID
)
if
(
NOT ANDROID
AND NOT IOS
)
find_package
(
Boost QUIET
)
endif
()
...
...
@@ -64,27 +64,29 @@ if(NOT CMAKE_BUILD_TYPE)
FORCE
)
endif
()
if
(
ANDROID
)
if
(
${
CMAKE_SYSTEM_VERSION
}
VERSION_LESS
"16"
)
message
(
FATAL_ERROR
"Unsupport standalone toolchains with Android API level lower than 16"
)
elseif
(
${
CMAKE_SYSTEM_VERSION
}
VERSION_LESS
"21"
)
# TODO: support glog for Android api 16 ~ 19 in the future
message
(
WARNING
"Using the unofficial git repository <https://github.com/Xreki/glog.git> instead"
)
if
(
ANDROID OR IOS
)
if
(
ANDROID
)
if
(
AND
${
CMAKE_SYSTEM_VERSION
}
VERSION_LESS
"16"
)
message
(
FATAL_ERROR
"Unsupport standalone toolchains with Android API level lower than 16"
)
elseif
(
${
CMAKE_SYSTEM_VERSION
}
VERSION_LESS
"21"
)
# TODO: support glog for Android api 16 ~ 19 in the future
message
(
WARNING
"Using the unofficial git repository <https://github.com/Xreki/glog.git> instead"
)
endif
()
endif
()
set
(
WITH_GPU OFF CACHE STRING
"Disable GPU when cross-compiling for Android"
FORCE
)
"Disable GPU when cross-compiling for Android
and iOS
"
FORCE
)
set
(
WITH_AVX OFF CACHE STRING
"Disable AVX when cross-compiling for Android"
FORCE
)
"Disable AVX when cross-compiling for Android
and iOS
"
FORCE
)
set
(
WITH_PYTHON OFF CACHE STRING
"Disable PYTHON when cross-compiling for Android"
FORCE
)
"Disable PYTHON when cross-compiling for Android
and iOS
"
FORCE
)
set
(
WITH_RDMA OFF CACHE STRING
"Disable RDMA when cross-compiling for Android"
FORCE
)
"Disable RDMA when cross-compiling for Android
and iOS
"
FORCE
)
set
(
WITH_MKLDNN OFF CACHE STRING
"Disable MKLDNN when cross-compiling for Android"
FORCE
)
"Disable MKLDNN when cross-compiling for Android
and iOS
"
FORCE
)
set
(
WITH_MKLML OFF CACHE STRING
"Disable MKLML package when cross-compiling for Android"
FORCE
)
endif
(
ANDROID
)
"Disable MKLML package when cross-compiling for Android
and iOS
"
FORCE
)
endif
()
set
(
THIRD_PARTY_PATH
"
${
CMAKE_BINARY_DIR
}
/third_party"
CACHE STRING
"A path setting third party libraries download & build directories."
)
...
...
cmake/cblas.cmake
浏览文件 @
37faf495
...
...
@@ -171,3 +171,10 @@ if (REFERENCE_CBLAS_INCLUDE_DIR AND REFERENCE_CBLAS_LIBRARY)
add_definitions
(
-DPADDLE_USE_REFERENCE_CBLAS
)
message
(
STATUS
"Found reference-cblas (include:
${
CBLAS_INC_DIR
}
, library:
${
CBLAS_LIBRARIES
}
)"
)
endif
()
if
(
IOS_USE_VECLIB_FOR_BLAS AND VECLIB_FOUND
)
set
(
CBLAS_FOUND ON
)
set
(
CBLAS_PROVIDER vecLib
)
set
(
CBLAS_INC_DIR
${
VECLIB_INC_DIR
}
)
add_definitions
(
-DPADDLE_USE_VECLIB
)
endif
()
cmake/cross_compiling/ios.cmake
0 → 100644
浏览文件 @
37faf495
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This is a toolchain file for cross-compiling for iOS, and the
# configuration largely refers to public toolchain file:
# https://raw.githubusercontent.com/leetal/ios-cmake/master/ios.toolchain.cmake
# and
# https://github.com/cristeab/ios-cmake
#
# Supports options:
# IOS_PLATFORM = OS (default) or SIMULATOR
# This decides if SDKS will be selected from the iPhoneOS.platform or iPhoneSimulator.platform folders
# OS - the default, used to build for iPhone and iPad physical devices, which have an arm arch.
# SIMULATOR - used to build for the Simulator platforms, which have an x86 arch.
# IOS_ARCH
# The archectures wanted to support, such "arm64", "armv7;arm64"
# IOS_DEPLOYMENT_TARGET
# The minimum iOS deployment version, such as "7.0"
# IOS_ENABLE_BITCODE = ON (default) or OFF
# IOS_USE_VECLIB_FOR_BLAS = OFF (default) or ON
# IOS_DEVELOPER_ROOT = automatic(default) or /path/to/platform/Developer folder
# By default this location is automatcially chosen based on the IOS_PLATFORM value above.
# If set manually, it will override the default location and force the user of a particular Developer Platform
# IOS_SDK_ROOT = automatic(default) or /path/to/platform/Developer/SDKs/SDK folder
# By default this location is automatcially chosen based on the IOS_DEVELOPER_ROOT value.
# In this case it will always be the most up-to-date SDK found in the IOS_DEVELOPER_ROOT path.
# If set manually, this will force the use of a specific SDK version
# Macros:
# set_xcode_property (TARGET XCODE_PROPERTY XCODE_VALUE)
# A convenience macro for setting xcode specific properties on targets
# example: set_xcode_property (myioslib IPHONEOS_DEPLOYMENT_TARGET "3.1")
# find_host_package (PROGRAM ARGS)
# A macro used to find executable programs on the host system, not within the iOS environment.
# Thanks to the android-cmake project for providing the command
if
(
NOT IOS
)
return
()
endif
()
set
(
CMAKE_SYSTEM_NAME Darwin
)
# Get the Xcode version being used.
execute_process
(
COMMAND xcodebuild -version
OUTPUT_VARIABLE XCODE_VERSION
RESULT_VARIABLE XCODE_VERSION_RESULT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE
)
if
(
NOT
${
XCODE_VERSION_RESULT
}
)
string
(
REGEX MATCH
"Xcode [0-9
\\
.]+"
XCODE_VERSION
"
${
XCODE_VERSION
}
"
)
string
(
REGEX REPLACE
"Xcode ([0-9
\\
.]+)"
"
\\
1"
XCODE_VERSION
"
${
XCODE_VERSION
}
"
)
message
(
STATUS
"Building with Xcode version:
${
XCODE_VERSION
}
"
)
else
()
message
(
FATAL_ERROR
"Cannot execute xcodebuild, please check whether xcode is installed."
)
endif
()
# Required as of cmake 2.8.10
set
(
CMAKE_OSX_DEPLOYMENT_TARGET
""
CACHE STRING
"Force unset of the deployment target for iOS"
FORCE
)
# Setup iOS platform unless specified manually with IOS_PLATFORM
if
(
NOT DEFINED IOS_PLATFORM
)
set
(
IOS_PLATFORM
"OS"
)
endif
()
set
(
IOS_PLATFORM
${
IOS_PLATFORM
}
CACHE STRING
"Type of iOS Platform"
)
# Set the architecture for iOS
if
(
NOT DEFINED IOS_ARCH
)
if
(
IOS_PLATFORM STREQUAL
"OS"
)
# FIXME(liuyiqun): support "armv7;armv7s;arm64" future
set
(
IOS_ARCH
"arm64"
)
elseif
(
IOS_PLATFORM STREQUAL
"SIMULATOR"
)
set
(
IOS_ARCH
"i386;x86_64"
)
elseif
(
IOS_PLATFORM STREQUAL
"WATCHOS"
)
set
(
IOS_ARCH armv7k
)
endif
()
endif
()
set
(
CMAKE_OSX_ARCHITECTURES
${
IOS_ARCH
}
CACHE string
"Build architecture for iOS"
)
# Specify minimum iOS deployment version
if
(
NOT DEFINED IOS_DEPLOYMENT_TARGET
)
set
(
IOS_DEPLOYMENT_TARGET
"7.0"
)
endif
()
set
(
IOS_DEPLOYMENT_TARGET
${
IOS_DEPLOYMENT_TARGET
}
CACHE STRING
"Minimum iOS version"
)
# Whether to enable bitcode
if
(
NOT DEFINED IOS_ENABLE_BITCODE
)
set
(
IOS_ENABLE_BITCODE ON
)
endif
()
set
(
IOS_ENABLE_BITCODE
${
IOS_ENABLE_BITCODE
}
CACHE BOOL
"Whether to enable bitcode"
)
if
(
NOT DEFINED IOS_USE_VECLIB_FOR_BLAS
)
set
(
IOS_USE_VECLIB_FOR_BLAS OFF
)
endif
()
set
(
IOS_USE_VECLIB_FOR_BLAS
${
IOS_UES_VECLIB_FOR_BLAS
}
CACHE BOOL
"Whether to use veclib"
)
# Check the platform selection and setup for developer root
if
(
${
IOS_PLATFORM
}
STREQUAL
"OS"
)
set
(
IOS_PLATFORM_LOCATION
"iPhoneOS.platform"
)
set
(
XCODE_IOS_PLATFORM iphoneos
)
# This causes the installers to properly locate the output libraries
set
(
CMAKE_XCODE_EFFECTIVE_PLATFORMS
"-iphoneos"
)
elseif
(
${
IOS_PLATFORM
}
STREQUAL
"SIMULATOR"
)
set
(
IOS_PLATFORM_LOCATION
"iPhoneSimulator.platform"
)
set
(
XCODE_IOS_PLATFORM iphonesimulator
)
# This causes the installers to properly locate the output libraries
set
(
CMAKE_XCODE_EFFECTIVE_PLATFORMS
"-iphonesimulator"
)
elseif
(
${
IOS_PLATFORM
}
STREQUAL
"WATCHOS"
)
set
(
IOS_PLATFORM_LOCATION
"WatchOS.platform"
)
set
(
XCODE_IOS_PLATFORM watchos
)
# This causes the installers to properly locate the output libraries
set
(
CMAKE_XCODE_EFFECTIVE_PLATFORMS
"-watchos"
)
else
(
${
IOS_PLATFORM
}
STREQUAL
"OS"
)
message
(
FATAL_ERROR
"Unsupported IOS_PLATFORM value selected. Please set to
\n
"
"
\t
OS, SIMULATOR, or WATCHOS."
)
endif
()
# Check iOS developer toolchain
if
(
NOT DEFINED IOS_DEVELOPER_ROOT
)
# Setup iOS developer location
execute_process
(
COMMAND xcode-select -print-path
OUTPUT_VARIABLE XCODE_DEVELOPER_DIR
RESULT_VARIABLE XCODE_DEVELOPER_DIR_RESULT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE
)
# Xcode 4.3 changed the installation location, choose the most recent one available
if
(
${
XCODE_VERSION
}
VERSION_LESS
"4.3.0"
)
set
(
IOS_DEVELOPER_ROOT
"/Developer/Platforms/
${
IOS_PLATFORM_LOCATION
}
/Developer"
)
else
()
set
(
IOS_DEVELOPER_ROOT
"
${
XCODE_DEVELOPER_DIR
}
/Platforms/
${
IOS_PLATFORM_LOCATION
}
/Developer"
)
endif
()
endif
()
if
(
EXISTS
${
IOS_DEVELOPER_ROOT
}
)
set
(
IOS_DEVELOPER_ROOT
${
IOS_DEVELOPER_ROOT
}
CACHE PATH
"Location of iOS Platform"
)
else
()
message
(
FATAL_ERROR
"Invalid IOS_DEVELOPER_ROOT:
${
IOS_DEVELOPER_ROOT
}
does not exist."
)
endif
()
# Check iOS SDK
if
(
NOT DEFINED IOS_SDK_ROOT
)
# Find and use the most recent iOS sdk
file
(
GLOB IOS_SDK_LISTS
"
${
IOS_DEVELOPER_ROOT
}
/SDKs/*"
)
if
(
IOS_SDK_LISTS
)
list
(
SORT IOS_SDK_LISTS
)
list
(
REVERSE IOS_SDK_LISTS
)
list
(
GET IOS_SDK_LISTS 0 IOS_SDK_ROOT
)
else
(
IOS_SDK_LISTS
)
message
(
FATAL_ERROR
"No iOS SDK's found in default search path
${
IOS_DEVELOPER_ROOT
}
."
" Please manually set IOS_SDK_ROOT or install the iOS SDK."
)
endif
(
IOS_SDK_LISTS
)
endif
()
if
(
EXISTS
${
IOS_SDK_ROOT
}
)
set
(
IOS_SDK_ROOT
${
IOS_SDK_ROOT
}
CACHE PATH
"Location of the selected iOS SDK"
)
message
(
STATUS
"iOS toolchain:
${
IOS_SDK_ROOT
}
"
)
else
()
message
(
FATAL_ERROR
"Invalid IOS_SDK_ROOT:
${
IOS_SDK_ROOT
}
does not exist."
)
endif
()
# Set the sysroot default to the most recent SDK
set
(
CMAKE_OSX_SYSROOT
${
IOS_SDK_ROOT
}
CACHE PATH
"Sysroot used for iOS support"
)
# Get version of iOS SDK
execute_process
(
COMMAND xcodebuild -sdk
${
CMAKE_OSX_SYSROOT
}
-version SDKVersion
OUTPUT_VARIABLE IOS_SDK_VERSION
RESULT_VARIABLE IOS_SDK_VERSION_RESULT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE
)
if
(
${
IOS_SDK_VERSION_RESULT
}
)
string
(
REGEX MATCH
"(([0-9]+)
\\
.)+([0-9]+)"
IOS_SDK_VERSION
"
${
IOS_SDK_ROOT
}
"
)
endif
()
if
(
NOT IOS_SDK_VERSION
)
message
(
WARNING
"Cannot get SDK's version."
)
set
(
IOS_SDK_VERSION 1
)
endif
()
set
(
CMAKE_SYSTEM_VERSION
${
IOS_SDK_VERSION
}
)
# Find the C & C++ compilers for the specified SDK.
if
(
NOT CMAKE_C_COMPILER
)
# Default to use clang
execute_process
(
COMMAND xcrun -sdk
${
CMAKE_OSX_SYSROOT
}
-find clang
OUTPUT_VARIABLE IOS_C_COMPILER
RESULT_VARIABLE IOS_C_COMPILER_RESULT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE
)
if
(
${
IOS_C_COMPILER_RESULT
}
)
get_filename_component
(
IOS_C_COMPILER clang PROGRAM
)
endif
()
else
(
NOT CMAKE_C_COMPILER
)
# User can set it in cmake command
get_filename_component
(
IOS_C_COMPILER
${
CMAKE_C_COMPILER
}
PROGRAM
)
endif
(
NOT CMAKE_C_COMPILER
)
if
(
NOT EXISTS
${
IOS_C_COMPILER
}
)
message
(
FATAL_ERROR
"Cannot find C compiler:
${
IOS_C_COMPILER
}
"
)
endif
()
if
(
NOT CMAKE_CXX_COMPILER
)
# Default to use clang++
execute_process
(
COMMAND xcrun -sdk
${
CMAKE_OSX_SYSROOT
}
-find clang++
OUTPUT_VARIABLE IOS_CXX_COMPILER
RESULT_VARIABLE IOS_CXX_COMPILER_RESULT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE
)
if
(
${
IOS_CXX_COMPILER_RESULT
}
)
get_filename_component
(
IOS_CXX_COMPILER clang++ PROGRAM
)
endif
()
else
(
NOT CMAKE_CXX_COMPILER
)
# User can set it in cmake command
get_filename_component
(
IOS_CXX_COMPILER
${
CMAKE_CXX_COMPILER
}
PROGRAM
)
endif
(
NOT CMAKE_CXX_COMPILER
)
if
(
NOT EXISTS
${
IOS_CXX_COMPILER
}
)
message
(
FATAL_ERROR
"Cannot find CXX compiler:
${
IOS_CXX_COMPILER
}
"
)
endif
()
set
(
CMAKE_C_COMPILER
${
IOS_C_COMPILER
}
CACHE PATH
"C compiler"
FORCE
)
set
(
CMAKE_CXX_COMPILER
${
IOS_CXX_COMPILER
}
CACHE PATH
"CXX compiler"
FORCE
)
set
(
CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG
"-compatibility_version "
)
set
(
CMAKE_C_OSX_CURRENT_VERSION_FLAG
"-current_version "
)
set
(
CMAKE_CXX_OSX_COMPATIBILITY_VERSION_FLAG
"
${
CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG
}
"
)
set
(
CMAKE_CXX_OSX_CURRENT_VERSION_FLAG
"
${
CMAKE_C_OSX_CURRENT_VERSION_FLAG
}
"
)
# Set iOS specific C/C++ flags
if
(
IOS_PLATFORM STREQUAL
"OS"
)
if
(
XCODE_VERSION VERSION_LESS
"7.0"
)
set
(
XCODE_IOS_PLATFORM_VERSION_FLAGS
"-mios-version-min=
${
IOS_DEPLOYMENT_TARGET
}
"
)
else
()
# Xcode 7.0+ uses flags we can build directly from XCODE_IOS_PLATFORM.
set
(
XCODE_IOS_PLATFORM_VERSION_FLAGS
"-m
${
XCODE_IOS_PLATFORM
}
-version-min=
${
IOS_DEPLOYMENT_TARGET
}
"
)
endif
()
else
()
set
(
XCODE_IOS_FLATFORM_VERSION_FLAGS
"-mios-simulator-version-min=
${
IOS_DEPLOYMENT_TARGET
}
"
)
endif
()
if
(
IOS_ENABLE_BITCODE
)
set
(
XCODE_IOS_BITCODE_FLAGS
"
${
IOS_COMPILER_FLAGS
}
-fembed-bitcode"
)
else
()
set
(
XCODE_IOS_BITCODE_FLAGS
""
)
endif
()
set
(
IOS_COMPILER_FLAGS
"
${
XCODE_IOS_PLATFORM_VERSION_FLAGS
}
${
XCODE_IOS_BITCODE_FLAGS
}
"
)
# Hidden visibilty is required for cxx on iOS
set
(
CMAKE_C_FLAGS
"
${
IOS_COMPILER_FLAGS
}
${
CMAKE_C_FLAGS
}
"
CACHE STRING
"C flags"
)
set
(
CMAKE_CXX_FLAGS
"
${
IOS_COMPILER_FLAGS
}
-fvisibility-inlines-hidden
${
CMAKE_CXX_FLAGS
}
"
CACHE STRING
"CXX flags"
)
set
(
IOS_LINK_FLAGS
"
${
XCODE_IOS_PLATFORM_VERSION_FLAGS
}
-Wl,-search_paths_first"
)
if
(
IOS_USE_VECLIB_FOR_BLAS
)
# Find vecLib for iOS
set
(
VECLIB_SEARCH_DIRS
${
IOS_SDK_ROOT
}
/System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks
${
IOS_SDK_ROOT
}
/System/Library/Frameworks/Accelerate.framework/Frameworks
)
find_path
(
VECLIB_INC_DIR vecLib.h PATHS
${
VECLIB_SEARCH_DIRS
}
/vecLib.framework/Headers
)
include
(
FindPackageHandleStandardArgs
)
find_package_handle_standard_args
(
vecLib DEFAULT_MSG VECLIB_INC_DIR
)
if
(
VECLIB_FOUND
)
if
(
VECLIB_INC_DIR MATCHES
"^/System/Library/Frameworks/vecLib.framework.*"
)
set
(
IOS_LINK_FLAGS
${
IOS_LINK_FLAGS
}
-lcblas
"-framework vecLib"
)
message
(
STATUS
"Found standalone vecLib.framework"
)
else
()
set
(
IOS_LINK_FLAGS
${
IOS_LINK_FLAGS
}
-lcblas
"-framework Accelerate"
)
message
(
STATUS
"Found vecLib as part of Accelerate.framework"
)
endif
()
endif
()
endif
()
set
(
CMAKE_C_LINK_FLAGS
"
${
IOS_LINK_FLAGS
}
${
CMAKE_C_LINK_FLAGS
}
"
)
set
(
CMAKE_CXX_LINK_FLAGS
"
${
IOS_LINK_FLAGS
}
${
CMAKE_CXX_LINK_FLAGS
}
"
)
set
(
CMAKE_PLATFORM_HAS_INSTALLNAME 1
)
if
(
NOT IOS_ENABLE_BITCODE
)
set
(
CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS
"-dynamiclib -headerpad_max_install_names"
)
set
(
CMAKE_SHARED_MODULE_CREATE_C_FLAGS
"-bundle -headerpad_max_install_names"
)
else
()
set
(
CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS
"-dynamiclib"
)
set
(
CMAKE_SHARED_MODULE_CREATE_C_FLAGS
"-bundle"
)
endif
()
set
(
CMAKE_SHARED_MODULE_LOADER_C_FLAG
"-Wl,-bundle_loader,"
)
set
(
CMAKE_SHARED_MODULE_LOADER_CXX_FLAG
"-Wl,-bundle_loader,"
)
set
(
CMAKE_FIND_LIBRARY_SUFFIXES
".dylib"
".so"
".a"
)
# hack: if a new cmake (which uses CMAKE_INSTALL_NAME_TOOL) runs on an old build tree
# (where install_name_tool was hardcoded) and where CMAKE_INSTALL_NAME_TOOL isn't in the cache
# and still cmake didn't fail in CMakeFindBinUtils.cmake (because it isn't rerun)
# hardcode CMAKE_INSTALL_NAME_TOOL here to install_name_tool, so it behaves as it did before, Alex
if
(
NOT DEFINED CMAKE_INSTALL_NAME_TOOL
)
find_program
(
CMAKE_INSTALL_NAME_TOOL install_name_tool
)
endif
()
# Set the find root to the iOS developer roots and to user defined paths
set
(
CMAKE_FIND_ROOT_PATH
${
IOS_DEVELOPER_ROOT
}
${
IOS_SDK_ROOT
}
${
CMAKE_PREFIX_PATH
}
CACHE string
"iOS find search path root"
)
# default to searching for frameworks first
set
(
CMAKE_FIND_FRAMEWORK FIRST
)
# set up the default search directories for frameworks
set
(
CMAKE_SYSTEM_FRAMEWORK_PATH
${
IOS_SDK_ROOT
}
/System/Library/Frameworks
${
IOS_SDK_ROOT
}
/System/Library/PrivateFrameworks
${
IOS_SDK_ROOT
}
/Developer/Library/Frameworks
)
# only search the iOS sdks, not the remainder of the host filesystem
set
(
CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER
)
set
(
CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY
)
set
(
CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY
)
message
(
STATUS
"iOS: Targeting iOS '
${
CMAKE_SYSTEM_VERSION
}
', "
"building for '
${
IOS_PLATFORM
}
' platform, with architecture '
${
CMAKE_OSX_ARCHITECTURES
}
'"
)
message
(
STATUS
"System CMAKE_C_FLAGS:
${
CMAKE_C_FLAGS
}
"
)
message
(
STATUS
"System CMAKE_CXX_FLAGS:
${
CMAKE_CXX_FLAGS
}
"
)
# Used in ExternalProject command
string
(
REPLACE
";"
"
\\
$<SEMICOLON>"
EXTERNAL_IOS_ARCHITECTURES
"
${
CMAKE_OSX_ARCHITECTURES
}
"
)
set
(
EXTERNAL_OPTIONAL_ARGS
-DCMAKE_OSX_SYSROOT=
${
CMAKE_OSX_SYSROOT
}
-DCMAKE_OSX_ARCHITECTURES=
${
EXTERNAL_IOS_ARCHITECTURES
}
)
# This little macro lets you set any XCode specific property
macro
(
set_xcode_property TARGET XCODE_PROPERTY XCODE_VALUE
)
set_property
(
TARGET
${
TARGET
}
PROPERTY XCODE_ATTRIBUTE_
${
XCODE_PROPERTY
}
${
XCODE_VALUE
}
)
endmacro
(
set_xcode_property
)
# This macro lets you find executable programs on the host system
macro
(
find_host_package
)
set
(
CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER
)
set
(
CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER
)
set
(
CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER
)
set
(
IOS FALSE
)
find_package
(
${
ARGN
}
)
set
(
IOS TRUE
)
set
(
CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY
)
set
(
CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY
)
set
(
CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY
)
endmacro
(
find_host_package
)
cmake/external/gflags.cmake
浏览文件 @
37faf495
...
...
@@ -39,13 +39,14 @@ ExternalProject_Add(
PREFIX
${
GFLAGS_SOURCES_DIR
}
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
CMAKE_ARGS -DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
CMAKE_ARGS -DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
${
GFLAGS_INSTALL_DIR
}
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DBUILD_TESTING=OFF
CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release
-DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
-DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
-DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
-DCMAKE_INSTALL_PREFIX=
${
GFLAGS_INSTALL_DIR
}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DBUILD_TESTING=OFF
-DCMAKE_BUILD_TYPE=Release
${
EXTERNAL_OPTIONAL_ARGS
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
GFLAGS_INSTALL_DIR
}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release
...
...
cmake/external/glog.cmake
浏览文件 @
37faf495
...
...
@@ -34,16 +34,17 @@ ExternalProject_Add(
PREFIX
${
GLOG_SOURCES_DIR
}
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
CMAKE_ARGS -DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
CMAKE_ARGS -DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
${
GLOG_INSTALL_DIR
}
CMAKE_ARGS -DCMAKE_INSTALL_LIBDIR=
${
GLOG_INSTALL_DIR
}
/lib
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DWITH_GFLAGS=ON
CMAKE_ARGS -Dgflags_DIR=
${
GFLAGS_INSTALL_DIR
}
/lib/cmake/gflags
CMAKE_ARGS -DBUILD_TESTING=OFF
CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release
-DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
-DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
-DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
-DCMAKE_INSTALL_PREFIX=
${
GLOG_INSTALL_DIR
}
-DCMAKE_INSTALL_LIBDIR=
${
GLOG_INSTALL_DIR
}
/lib
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DWITH_GFLAGS=ON
-Dgflags_DIR=
${
GFLAGS_INSTALL_DIR
}
/lib/cmake/gflags
-DBUILD_TESTING=OFF
-DCMAKE_BUILD_TYPE=Release
${
EXTERNAL_OPTIONAL_ARGS
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
GLOG_INSTALL_DIR
}
-DCMAKE_INSTALL_LIBDIR:PATH=
${
GLOG_INSTALL_DIR
}
/lib
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
...
...
cmake/external/gtest.cmake
浏览文件 @
37faf495
...
...
@@ -48,15 +48,16 @@ IF(WITH_TESTING)
PREFIX
${
GTEST_SOURCES_DIR
}
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
CMAKE_ARGS -DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
CMAKE_ARGS -DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
${
GTEST_INSTALL_DIR
}
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DBUILD_GMOCK=ON
CMAKE_ARGS -Dgtest_disable_pthreads=ON
CMAKE_ARGS -Dgtest_force_shared_crt=ON
CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release
-DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
-DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
-DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
-DCMAKE_INSTALL_PREFIX=
${
GTEST_INSTALL_DIR
}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DBUILD_GMOCK=ON
-Dgtest_disable_pthreads=ON
-Dgtest_force_shared_crt=ON
-DCMAKE_BUILD_TYPE=Release
${
EXTERNAL_OPTIONAL_ARGS
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
GTEST_INSTALL_DIR
}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release
...
...
cmake/external/openblas.cmake
浏览文件 @
37faf495
...
...
@@ -29,30 +29,41 @@ IF(NOT ${CBLAS_FOUND})
"
${
CBLAS_INSTALL_DIR
}
/lib/
${
CMAKE_STATIC_LIBRARY_PREFIX
}
openblas
${
CMAKE_STATIC_LIBRARY_SUFFIX
}
"
CACHE FILEPATH
"openblas library."
FORCE
)
IF
(
APPLE
)
SET
(
OPENBLAS_CC
"
${
CMAKE_C_COMPILER
}
-isysroot
${
CMAKE_OSX_SYSROOT
}
"
)
SET
(
COMMON_ARGS CC=
${
OPENBLAS_CC
}
NO_SHARED=1 NO_LAPACK=1 libs
)
ELSE
()
SET
(
COMMON_ARGS CC=
${
CMAKE_C_COMPILER
}
NO_SHARED=1 NO_LAPACK=1 libs
)
ENDIF
()
SET
(
OPENBLAS_CC
"
${
CMAKE_C_COMPILER
}
"
)
IF
(
CMAKE_CROSSCOMPILING
)
SET
(
OPTIONAL_ARGS HOSTCC=
${
HOST_C_COMPILER
}
)
GET_FILENAME_COMPONENT
(
CROSS_SUFFIX
${
CMAKE_C_COMPILER
}
DIRECTORY
)
SET
(
CROSS_SUFFIX
${
CROSS_SUFFIX
}
/
)
IF
(
ANDROID
)
# arm_soft_fp_abi branch of OpenBLAS to support softfp
# https://github.com/xianyi/OpenBLAS/tree/arm_soft_fp_abi
SET
(
OPENBLAS_COMMIT
"b5c96fcfcdc82945502a2303116a64d89985daf5"
)
IF
(
ANDROID_ABI MATCHES
"^armeabi(-v7a)?$"
)
SET
(
TARGET
"ARMV7"
)
SET
(
OPTIONAL_ARGS
${
OPTIONAL_ARGS
}
TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0
)
ELSEIF
(
ANDROID_ABI STREQUAL
"arm64-v8a"
)
SET
(
TARGET
"ARMV8"
)
SET
(
OPTIONAL_ARGS
${
OPTIONAL_ARGS
}
TARGET=ARMV8 BINARY=64 USE_THREAD=0
)
ENDIF
()
ELSEIF
(
IOS
)
# FIXME(liuyiqun): support multiple architectures
SET
(
OPENBLAS_COMMIT
"b5c96fcfcdc82945502a2303116a64d89985daf5"
)
SET
(
OPENBLAS_CC
"
${
OPENBLAS_CC
}
${
CMAKE_C_FLAGS
}
-isysroot
${
CMAKE_OSX_SYSROOT
}
"
)
IF
(
CMAKE_OSX_ARCHITECTURES MATCHES
"armv7"
)
SET
(
OPENBLAS_CC
"
${
OPENBLAS_CC
}
-arch armv7"
)
SET
(
OPTIONAL_ARGS
${
OPTIONAL_ARGS
}
TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0
)
ELSEIF
(
CMAKE_OSX_ARCHITECTURES MATCHES
"arm64"
)
SET
(
OPENBLAS_CC
"
${
OPENBLAS_CC
}
-arch arm64"
)
SET
(
OPTIONAL_ARGS
${
OPTIONAL_ARGS
}
TARGET=ARMV8 BINARY=64 USE_THREAD=0 CROSS_SUFFIX=
${
CROSS_SUFFIX
}
)
ENDIF
()
SET
(
OPTIONAL_ARGS HOSTCC=
${
HOST_C_COMPILER
}
TARGET=
${
TARGET
}
ARM_SOFTFP_ABI=1 USE_THREAD=0
)
ELSEIF
(
RPI
)
# use hardfp
SET
(
OPENBLAS_COMMIT
"v0.2.20"
)
SET
(
OPTIONAL_ARGS
HOSTCC=
${
HOST_C_COMPILER
}
TARGET=ARMV7 USE_THREAD=0
)
SET
(
OPTIONAL_ARGS
${
OPTIONAL_ARGS
}
TARGET=ARMV7 USE_THREAD=0
)
ENDIF
()
ELSE
()
IF
(
APPLE
)
SET
(
OPENBLAS_CC
"
${
CMAKE_C_COMPILER
}
-isysroot
${
CMAKE_OSX_SYSROOT
}
"
)
ENDIF
()
SET
(
OPENBLAS_COMMIT
"v0.2.20"
)
SET
(
OPTIONAL_ARGS
""
)
IF
(
CMAKE_SYSTEM_PROCESSOR MATCHES
"^x86(_64)?$"
)
...
...
@@ -60,6 +71,8 @@ IF(NOT ${CBLAS_FOUND})
ENDIF
()
ENDIF
()
SET
(
COMMON_ARGS CC=
${
OPENBLAS_CC
}
NO_SHARED=1 NO_LAPACK=1 libs
)
ExternalProject_Add
(
extern_openblas
${
EXTERNAL_PROJECT_LOG_ARGS
}
...
...
cmake/external/protobuf.cmake
浏览文件 @
37faf495
...
...
@@ -173,7 +173,8 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
"-DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
"
"-DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
"
"-Dprotobuf_WITH_ZLIB=ON"
"-DZLIB_ROOT:FILEPATH=
${
ZLIB_ROOT
}
"
)
"-DZLIB_ROOT:FILEPATH=
${
ZLIB_ROOT
}
"
${
EXTERNAL_OPTIONAL_ARGS
}
)
SET
(
OPTIONAL_CACHE_ARGS
"-DZLIB_ROOT:STRING=
${
ZLIB_ROOT
}
"
)
ENDIF
()
...
...
cmake/external/python.cmake
浏览文件 @
37faf495
...
...
@@ -12,16 +12,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE
(
ExternalProject
)
IF
(
NOT WITH_PYTHON
)
return
()
ENDIF
()
INCLUDE
(
python_module
)
FIND_PACKAGE
(
PythonInterp 2.7
)
IF
(
WITH_PYTHON
)
FIND_PACKAGE
(
PythonLibs 2.7
)
# Fixme: Maybe find a static library. Get SHARED/STATIC by FIND_PACKAGE.
ADD_LIBRARY
(
python SHARED IMPORTED GLOBAL
)
SET_PROPERTY
(
TARGET python PROPERTY IMPORTED_LOCATION
${
PYTHON_LIBRARIES
}
)
ENDIF
(
WITH_PYTHON
)
FIND_PACKAGE
(
PythonLibs 2.7
)
# Fixme: Maybe find a static library. Get SHARED/STATIC by FIND_PACKAGE.
ADD_LIBRARY
(
python SHARED IMPORTED GLOBAL
)
SET_PROPERTY
(
TARGET python PROPERTY IMPORTED_LOCATION
${
PYTHON_LIBRARIES
}
)
SET
(
py_env
""
)
IF
(
PYTHONINTERP_FOUND
)
...
...
@@ -36,9 +37,5 @@ IF(PYTHONINTERP_FOUND)
ENDIF
()
ENDIF
(
PYTHONINTERP_FOUND
)
IF
(
WITH_PYTHON
)
INCLUDE_DIRECTORIES
(
${
PYTHON_INCLUDE_DIR
}
)
INCLUDE_DIRECTORIES
(
${
PYTHON_NUMPY_INCLUDE_DIR
}
)
ELSE
()
SET
(
PYTHON_LIBRARIES
""
)
ENDIF
()
INCLUDE_DIRECTORIES
(
${
PYTHON_INCLUDE_DIR
}
)
INCLUDE_DIRECTORIES
(
${
PYTHON_NUMPY_INCLUDE_DIR
}
)
cmake/external/swig.cmake
浏览文件 @
37faf495
...
...
@@ -12,6 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
IF
(
NOT WITH_SWIG_PY
)
return
()
ENDIF
()
FIND_PACKAGE
(
SWIG
)
IF
(
NOT SWIG_FOUND
)
...
...
cmake/external/warpctc.cmake
浏览文件 @
37faf495
...
...
@@ -16,25 +16,14 @@ INCLUDE(ExternalProject)
SET
(
WARPCTC_SOURCES_DIR
${
THIRD_PARTY_PATH
}
/warpctc
)
SET
(
WARPCTC_INSTALL_DIR
${
THIRD_PARTY_PATH
}
/install/warpctc
)
SET
(
WARPCTC_INCLUDE_DIR
"
${
WARPCTC_INSTALL_DIR
}
/include"
CACHE PATH
"Warp-ctc Directory"
FORCE
)
INCLUDE_DIRECTORIES
(
${
WARPCTC_INCLUDE_DIR
}
)
SET
(
WARPCTC_LIB_DIR
"
${
WARPCTC_INSTALL_DIR
}
/lib"
CACHE PATH
"Warp-ctc Library Directory"
FORCE
)
IF
(
WIN32
)
SET
(
WARPCTC_LIBRARIES
"
${
WARPCTC_INSTALL_DIR
}
/lib/warpctc.dll"
CACHE FILEPATH
"Warp-ctc Library"
FORCE
)
ELSE
(
WIN32
)
IF
(
APPLE
)
SET
(
_warpctc_SHARED_SUFFIX dylib
)
ELSE
(
APPLE
)
SET
(
_warpctc_SHARED_SUFFIX so
)
ENDIF
(
APPLE
)
SET
(
WARPCTC_LIBRARIES
"
${
WARPCTC_INSTALL_DIR
}
/lib/libwarpctc.
${
_warpctc_SHARED_SUFFIX
}
"
CACHE FILEPATH
"Warp-ctc Library"
FORCE
)
ENDIF
(
WIN32
)
SET
(
WARPCTC_INCLUDE_DIR
"
${
WARPCTC_INSTALL_DIR
}
/include"
CACHE PATH
"Warp-ctc Directory"
FORCE
)
# Used in unit test test_WarpCTCLayer
SET
(
WARPCTC_LIB_DIR
"
${
WARPCTC_INSTALL_DIR
}
/lib"
CACHE PATH
"Warp-ctc Library Directory"
FORCE
)
SET
(
WARPCTC_LIBRARIES
"
${
WARPCTC_INSTALL_DIR
}
/lib/libwarpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
"
CACHE FILEPATH
"Warp-ctc Library"
FORCE
)
IF
(
CMAKE_CXX_COMPILER_ID STREQUAL
"Clang"
OR CMAKE_CXX_COMPILER_ID STREQUAL
"AppleClang"
)
SET
(
USE_OMP OFF
)
...
...
@@ -49,22 +38,26 @@ ExternalProject_Add(
PREFIX
${
WARPCTC_SOURCES_DIR
}
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
CMAKE_ARGS -DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
CMAKE_ARGS -DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
${
WARPCTC_INSTALL_DIR
}
CMAKE_ARGS -DWITH_GPU=
${
WITH_GPU
}
CMAKE_ARGS -DWITH_OMP=
${
USE_OMP
}
CMAKE_ARGS -DWITH_TORCH=OFF
CMAKE_ARGS -DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON
CMAKE_ARGS -DBUILD_SHARED=ON
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release
-DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
-DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
-DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
-DCMAKE_INSTALL_PREFIX=
${
WARPCTC_INSTALL_DIR
}
-DWITH_GPU=
${
WITH_GPU
}
-DWITH_OMP=
${
USE_OMP
}
-DWITH_TORCH=OFF
-DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON
-DBUILD_SHARED=ON
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=Release
${
EXTERNAL_OPTIONAL_ARGS
}
CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=Release
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_INSTALL_PREFIX:PATH=
${
WARPCTC_INSTALL_DIR
}
)
MESSAGE
(
STATUS
"warp-ctc library:
${
WARPCTC_LIBRARIES
}
"
)
INCLUDE_DIRECTORIES
(
${
WARPCTC_INCLUDE_DIR
}
)
ADD_LIBRARY
(
warpctc STATIC IMPORTED GLOBAL
)
SET_PROPERTY
(
TARGET warpctc PROPERTY IMPORTED_LOCATION
${
WARPCTC_LIBRARIES
}
)
ADD_DEPENDENCIES
(
warpctc extern_warpctc
)
...
...
cmake/external/zlib.cmake
浏览文件 @
37faf495
...
...
@@ -34,15 +34,16 @@ ExternalProject_Add(
GIT_TAG
"v1.2.8"
PREFIX
${
ZLIB_SOURCES_DIR
}
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
CMAKE_ARGS -DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
CMAKE_ARGS -DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
${
ZLIB_INSTALL_DIR
}
CMAKE_ARGS -DBUILD_SHARED_LIBS=OFF
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DCMAKE_MACOSX_RPATH=ON
CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release
-DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
-DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
-DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
-DCMAKE_INSTALL_PREFIX=
${
ZLIB_INSTALL_DIR
}
-DBUILD_SHARED_LIBS=OFF
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_MACOSX_RPATH=ON
-DCMAKE_BUILD_TYPE=Release
${
EXTERNAL_OPTIONAL_ARGS
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
ZLIB_INSTALL_DIR
}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release
...
...
cmake/flags.cmake
浏览文件 @
37faf495
...
...
@@ -128,8 +128,10 @@ set(GPU_COMMON_FLAGS
)
if
(
APPLE
)
# On Mac OS X build fat binaries with x86_64 architectures by default.
set
(
CMAKE_OSX_ARCHITECTURES
"x86_64"
CACHE STRING
"Build architectures for OSX"
FORCE
)
if
(
NOT CMAKE_CROSSCOMPILING
)
# On Mac OS X build fat binaries with x86_64 architectures by default.
set
(
CMAKE_OSX_ARCHITECTURES
"x86_64"
CACHE STRING
"Build architectures for OSX"
FORCE
)
endif
()
else
()
set
(
GPU_COMMON_FLAGS
-Wall
...
...
cmake/system.cmake
浏览文件 @
37faf495
...
...
@@ -24,11 +24,10 @@ IF(WIN32)
SET
(
HOST_SYSTEM
"win32"
)
ELSE
(
WIN32
)
IF
(
APPLE
)
EXEC_PROGRAM
(
sw_vers ARGS -productVersion OUTPUT_VARIABLE MACOSX_VERSION
)
STRING
(
REGEX MATCH
"[0-9]+.[0-9]+"
VERSION
"
${
MACOSX_VERSION
}
"
)
SET
(
MACOS_VERSION
${
VERSION
}
)
SET
(
HOST_SYSTEM
"macosx"
)
IF
(
NOT DEFINED ENV{MACOSX_DEPLOYMENT_TARGET}
)
EXEC_PROGRAM
(
sw_vers ARGS -productVersion OUTPUT_VARIABLE HOST_SYSTEM_VERSION
)
STRING
(
REGEX MATCH
"[0-9]+.[0-9]+"
MACOS_VERSION
"
${
HOST_SYSTEM_VERSION
}
"
)
IF
(
NOT DEFINED $ENV{MACOSX_DEPLOYMENT_TARGET}
)
# Set cache variable - end user may change this during ccmake or cmake-gui configure.
SET
(
CMAKE_OSX_DEPLOYMENT_TARGET
${
MACOS_VERSION
}
CACHE STRING
"Minimum OS X version to target for deployment (at runtime); newer APIs weak linked. Set to empty string for default value."
)
...
...
@@ -49,6 +48,8 @@ ELSE(WIN32)
ELSEIF
(
LINUX_ISSUE MATCHES
"Fedora"
)
SET
(
HOST_SYSTEM
"fedora"
)
ENDIF
()
STRING
(
REGEX MATCH
"(([0-9]+)
\\
.)+([0-9]+)"
HOST_SYSTEM_VERSION
"
${
LINUX_ISSUE
}
"
)
ENDIF
(
EXISTS
"/etc/issue"
)
IF
(
EXISTS
"/etc/redhat-release"
)
...
...
@@ -70,7 +71,7 @@ CMAKE_HOST_SYSTEM_INFORMATION(RESULT CPU_CORES QUERY NUMBER_OF_LOGICAL_CORES)
MARK_AS_ADVANCED
(
HOST_SYSTEM CPU_CORES
)
MESSAGE
(
STATUS
"Found Paddle host system:
${
HOST_SYSTEM
}
"
)
MESSAGE
(
STATUS
"Found Paddle host system:
${
HOST_SYSTEM
}
, version:
${
HOST_SYSTEM_VERSION
}
"
)
MESSAGE
(
STATUS
"Found Paddle host system's CPU:
${
CPU_CORES
}
cores"
)
# configuration for cross-compiling
...
...
@@ -82,6 +83,9 @@ IF(DEFINED CMAKE_SYSTEM_NAME)
ELSEIF
(
${
CMAKE_SYSTEM_NAME
}
STREQUAL
"RPi"
)
SET
(
RPI TRUE
)
INCLUDE
(
cross_compiling/raspberry_pi
)
ELSEIF
(
${
CMAKE_SYSTEM_NAME
}
STREQUAL
"iOS"
)
SET
(
IOS TRUE
)
INCLUDE
(
cross_compiling/ios
)
ENDIF
()
ENDIF
()
...
...
cmake/util.cmake
浏览文件 @
37faf495
...
...
@@ -25,7 +25,9 @@ function(target_circle_link_libraries TARGET_NAME)
endif
()
endforeach
()
if
(
"
${
CMAKE_CXX_COMPILER_ID
}
"
STREQUAL
"Clang"
OR
"
${
CMAKE_CXX_COMPILER_ID
}
"
STREQUAL
"AppleClang"
)
list
(
APPEND LIBS
"-undefined dynamic_lookup"
)
if
(
IOS AND NOT IOS_ENABLE_BITCODE
)
list
(
APPEND LIBS
"-undefined dynamic_lookup"
)
endif
()
endif
()
list
(
REVERSE libsInArgn
)
target_link_libraries
(
${
TARGET_NAME
}
...
...
paddle/CMakeLists.txt
浏览文件 @
37faf495
...
...
@@ -19,7 +19,7 @@ if(Boost_FOUND)
endif
()
if
(
WITH_C_API
)
add_subdirectory
(
capi
)
add_subdirectory
(
capi
)
endif
()
if
(
WITH_SWIG_PY
)
...
...
paddle/capi/CMakeLists.txt
浏览文件 @
37faf495
...
...
@@ -28,42 +28,38 @@ add_style_check_target(paddle_capi ${CAPI_SOURCES} ${CAPI_HEADER}
add_dependencies
(
paddle_capi paddle_proto
)
# combine all paddle static libraries together, into libpaddle_capi_whole.a
# user should use PaddleCAPI as -lpaddle_capi_whole
set
(
capi_whole_library libpaddle_capi_whole.a
)
add_custom_target
(
paddle_capi_whole ALL
COMMAND mkdir -p o_files/capi && cd o_files/capi/ && ar -x $<TARGET_FILE:paddle_capi>
COMMAND mkdir -p o_files/utils && cd o_files/utils/ && ar -x $<TARGET_FILE:paddle_utils>
COMMAND mkdir -p o_files/parameter && cd o_files/parameter/ && ar -x $<TARGET_FILE:paddle_parameter>
COMMAND mkdir -p o_files/math && cd o_files/math/ && ar -x $<TARGET_FILE:paddle_math>
COMMAND mkdir -p o_files/cuda && cd o_files/cuda/ && ar -x $<TARGET_FILE:paddle_cuda>
COMMAND mkdir -p o_files/function && cd o_files/function/ && ar -x $<TARGET_FILE:paddle_function>
COMMAND mkdir -p o_files/gserver && cd o_files/gserver/ && ar -x $<TARGET_FILE:paddle_gserver>
COMMAND mkdir -p o_files/proto && cd o_files/proto/ && ar -x $<TARGET_FILE:paddle_proto>
COMMAND mkdir -p o_files/network && cd o_files/network/ && ar -x $<TARGET_FILE:paddle_network>
COMMAND mkdir -p o_files/pserver && cd o_files/pserver/ && ar -x $<TARGET_FILE:paddle_pserver>
COMMAND ar crs
${
capi_whole_library
}
`find ./o_files -name '*.o'`
COMMAND rm -rf o_files
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
DEPENDS paddle_capi paddle_utils paddle_parameter paddle_math
paddle_cuda paddle_function paddle_gserver
paddle_proto paddle_pserver paddle_network
)
set_target_properties
(
paddle_capi_whole
PROPERTIES IMPORTED_LOCATION
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
capi_whole_library
}
)
set
(
PADDLE_CAPI_INFER_LIBS
paddle_utils
paddle_parameter
paddle_math
paddle_cuda
paddle_function
paddle_gserver
paddle_proto
paddle_pserver
paddle_network
)
cc_library
(
paddle_capi_whole DEPS paddle_capi
${
PADDLE_CAPI_INFER_LIBS
}
)
set
(
LINK_FLAGS
" -Wl,--retain-symbols-file
${
CMAKE_CURRENT_SOURCE_DIR
}
/export.sym -Wl,--version-script
${
CMAKE_CURRENT_SOURCE_DIR
}
/export.map"
)
# TODO: merge mkl into paddle_capi_shared
add_library
(
paddle_capi_shared SHARED
${
CAPI_SOURCES
}
)
set_target_properties
(
paddle_capi_shared PROPERTIES LINK_FLAGS
"
${
LINK_FLAGS
}
"
)
target_include_directories
(
paddle_capi_shared PUBLIC
${
CMAKE_CURRENT_BINARY_DIR
}
)
link_paddle_exe
(
paddle_capi_shared
)
# No shared library for iOS
if
(
NOT IOS
)
set
(
LINK_FLAGS
" -Wl,--retain-symbols-file
${
CMAKE_CURRENT_SOURCE_DIR
}
/export.sym -Wl,--version-script
${
CMAKE_CURRENT_SOURCE_DIR
}
/export.map"
)
# TODO: merge mkl into paddle_capi_shared
add_library
(
paddle_capi_shared SHARED
${
CAPI_SOURCES
}
)
set_target_properties
(
paddle_capi_shared PROPERTIES LINK_FLAGS
"
${
LINK_FLAGS
}
"
)
target_include_directories
(
paddle_capi_shared PUBLIC
${
CMAKE_CURRENT_BINARY_DIR
}
)
link_paddle_exe
(
paddle_capi_shared
)
endif
()
# install library & headers.
install
(
FILES
${
CAPI_HEADERS
}
DESTINATION include/paddle
)
install
(
FILES
${
CMAKE_CURRENT_BINARY_DIR
}
/config.h DESTINATION include/paddle
)
if
(
ANDROID
)
install
(
TARGETS paddle_capi_whole paddle_capi_shared
ARCHIVE DESTINATION lib/
${
ANDROID_ABI
}
LIBRARY DESTINATION lib/
${
ANDROID_ABI
}
)
execute_process
(
COMMAND
${
GIT_EXECUTABLE
}
log --pretty=oneline -1
OUTPUT_VARIABLE GIT_COMMITS_LIST
...
...
@@ -72,9 +68,6 @@ if(ANDROID)
if
(
${
GIT_COMMITS_LIST_RESULT
}
)
set
(
GIT_COMMITS_LIST
"No commits."
)
endif
()
install
(
FILES
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
capi_whole_library
}
DESTINATION lib/
${
ANDROID_ABI
}
)
install
(
TARGETS paddle_capi_shared DESTINATION lib/
${
ANDROID_ABI
}
)
install
(
CODE
"FILE(WRITE
${
CMAKE_INSTALL_PREFIX
}
/lib/
${
ANDROID_ABI
}
/BUILD.txt
\"
Compiler:
\n\"
\"\\
t
${
CMAKE_C_COMPILER
}
\\
n
\"
...
...
@@ -88,8 +81,11 @@ if(ANDROID)
)"
)
else
(
ANDROID
)
install
(
FILES
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
capi_whole_library
}
DESTINATION lib
)
install
(
TARGETS paddle_capi_shared DESTINATION lib
)
install
(
TARGETS paddle_capi_whole
ARCHIVE DESTINATION lib
)
if
(
NOT IOS
)
install
(
TARGETS paddle_capi_shared DESTINATION lib
)
endif
()
endif
(
ANDROID
)
# this variable used for unittest
...
...
paddle/function/neon/NeonDepthwiseConv.cpp
浏览文件 @
37faf495
...
...
@@ -52,7 +52,7 @@ public:
int
outputHeight
=
output
[
2
];
int
outputWidth
=
output
[
3
];
int
filterMultiplier
=
outputChannels
/
groups_
;
CHECK_EQ
(
inputChannels
,
groups_
);
CHECK_EQ
(
static_cast
<
size_t
>
(
inputChannels
)
,
groups_
);
// only support strideH() == strideW() and filterHeight == filterWidth.
CHECK_EQ
(
strideH
(),
strideW
());
...
...
paddle/gserver/activations/ActivationFunction.cpp
浏览文件 @
37faf495
...
...
@@ -22,9 +22,12 @@ limitations under the License. */
#include <type_traits>
#include "paddle/parameter/Argument.h"
#include "paddle/utils/ClassRegistrar.h"
#include "paddle/utils/Logging.h"
#ifdef PADDLE_USE_MKLDNN
#include "MKLDNNActivation.h"
#endif
namespace
paddle
{
static
ClassRegistrar
<
ActivationFunction
>
gActivationRegistrar
;
...
...
@@ -456,6 +459,12 @@ Error __must_check backward(Argument& act) {
END_DEFINE_ACTIVATION
(
log
)
ActivationFunction
*
ActivationFunction
::
create
(
const
std
::
string
&
type
)
{
#ifdef PADDLE_USE_MKLDNN
if
(
!
type
.
empty
()
&&
type
.
compare
(
0
,
7
,
"mkldnn_"
)
==
0
)
{
return
MKLDNNActivation
::
create
(
type
);
}
#endif
return
gActivationRegistrar
.
createByType
(
type
);
}
...
...
paddle/gserver/activations/MKLDNNActivation.cpp
0 → 100644
浏览文件 @
37faf495
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "MKLDNNActivation.h"
#include "mkldnn.hpp"
#include "paddle/utils/ClassRegistrar.h"
namespace
paddle
{
static
ClassRegistrar
<
ActivationFunction
>
gMKLDNNActivationRegistrar
;
/**
* @def MKLDNN_ACTIVATION_CLASS_NAME
* @note MKLDNN_ACTIVATION_CLASS_NAME(relu) relu_;
* means mkldnn_reluActivation relu_;
*/
#define MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE) mkldnn_##ACT_TYPE##Activation
/**
* @def DEFINE_MKLDNN_ELTWISE_ACTIVATION
*/
#define DEFINE_MKLDNN_ELTWISE_ACTIVATION(ACT_TYPE, ALPHA, BWD_ALPHA) \
class MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE) \
: public MKLDNNEltwiseActivation { \
private: \
static const std::string name; \
static const float alpha; \
static const float bwdAlpha; \
\
public: \
const std::string& getName() const { return name; } \
float getAlpha() const { return alpha; } \
float getBwdAlpha() const { return bwdAlpha; } \
}; \
const std::string MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::name = \
"mkldnn_" #ACT_TYPE; \
const float MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::alpha = ALPHA; \
const float MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::bwdAlpha = BWD_ALPHA; \
static InitFunction __reg_activation__mkldnn_##ACT_TYPE([] { \
gMKLDNNActivationRegistrar \
.registerClass<MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)>( \
"mkldnn_" #ACT_TYPE); \
});
/**
* @brief MKLDNN Relu Activation.
* Actually mkldnn_relu is Leaky Relu.
* f(x) = x (x >= 0)
* f(x) = negative_slope * x (x < 0)
* @note the negative_slope should be -0.f in forward
*/
DEFINE_MKLDNN_ELTWISE_ACTIVATION
(
relu
,
-
0.
f
,
0.
f
)
/**
* @brief MKLDNN Tanh Activation.
*/
DEFINE_MKLDNN_ELTWISE_ACTIVATION
(
tanh
,
0.
f
,
0.
f
)
/**
* @brief MKLDNN ELU(Exponential Linear Unit) Activation.
* f(x) = x (x >= 0)
* f(x) = negative_slope * (exp(x) - 1) (x < 0)
*/
DEFINE_MKLDNN_ELTWISE_ACTIVATION
(
elu
,
0.
f
,
0.
f
)
ActivationFunction
*
MKLDNNActivation
::
create
(
const
std
::
string
&
type
)
{
return
gMKLDNNActivationRegistrar
.
createByType
(
type
);
}
std
::
vector
<
std
::
string
>
MKLDNNActivation
::
getAllRegisteredTypes
()
{
std
::
vector
<
std
::
string
>
types
;
gMKLDNNActivationRegistrar
.
forEachType
(
[
&
](
const
std
::
string
&
type
)
{
types
.
push_back
(
type
);
});
return
types
;
}
}
// namespace paddle
paddle/gserver/activations/MKLDNNActivation.h
0 → 100644
浏览文件 @
37faf495
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "ActivationFunction.h"
#include "mkldnn.hpp"
#include "paddle/gserver/layers/MKLDNNBase.h"
#include "paddle/math/MKLDNNMatrix.h"
#include "paddle/parameter/Argument.h"
namespace
paddle
{
/**
* @brief Base class of MKLDNN Activation.
* Common activation function are provieded,
* including mkldnn_relu, mkldnn_elu, mkldnn_tanh, mkldnn_softmax
*/
class
MKLDNNActivation
:
public
ActivationFunction
{
protected:
// input value element count
size_t
cnt_
;
// should not merge the resetBwd into resetFwd,
// because the grad data would be changing before backward.
bool
needResetBwd_
;
// mkldnn matrix, primitive, stream and pipeline
MKLDNNMatrixPtr
val_
;
MKLDNNMatrixPtr
grad_
;
std
::
shared_ptr
<
MKLDNNStream
>
stream_
;
std
::
shared_ptr
<
mkldnn
::
primitive
>
fwd_
;
std
::
shared_ptr
<
mkldnn
::
primitive
>
bwd_
;
std
::
vector
<
mkldnn
::
primitive
>
pipelineFwd_
;
std
::
vector
<
mkldnn
::
primitive
>
pipelineBwd_
;
public:
MKLDNNActivation
()
:
cnt_
(
0
),
needResetBwd_
(
true
)
{}
~
MKLDNNActivation
()
{}
static
ActivationFunction
*
create
(
const
std
::
string
&
type
);
static
std
::
vector
<
std
::
string
>
getAllRegisteredTypes
();
virtual
const
std
::
string
&
getName
()
const
=
0
;
virtual
Error
__must_check
forward
(
Argument
&
act
)
=
0
;
virtual
Error
__must_check
backward
(
Argument
&
act
)
=
0
;
};
/**
* @brief Base class of MKLDNN Eltwise Activation,
* includes mkldnn_relu, mkldnn_elu and mkldnn_tanh.
*/
class
MKLDNNEltwiseActivation
:
public
MKLDNNActivation
{
typedef
mkldnn
::
eltwise_forward
eltwise_fwd
;
typedef
mkldnn
::
eltwise_backward
eltwise_bwd
;
protected:
// save the forward primitive desc, which can be used backward
std
::
shared_ptr
<
eltwise_fwd
::
primitive_desc
>
fwdPD_
;
// eltwise_bwd need src input value
MKLDNNMatrixPtr
inVal_
;
// use for copy data
std
::
shared_ptr
<
mkldnn
::
reorder
>
copyInVal_
;
public:
MKLDNNEltwiseActivation
()
{}
~
MKLDNNEltwiseActivation
()
{}
virtual
const
std
::
string
&
getName
()
const
=
0
;
// in common, the alpha of forward and backward should be equal.
// but for relu, to avoid negative value, they should be opposite
virtual
float
getAlpha
()
const
=
0
;
virtual
float
getBwdAlpha
()
const
=
0
;
virtual
float
getBeta
()
const
{
return
0.
f
;
}
virtual
mkldnn
::
algorithm
getAlgo
(
const
std
::
string
&
type
)
const
{
if
(
type
==
"mkldnn_relu"
)
{
return
mkldnn
::
algorithm
::
eltwise_relu
;
}
else
if
(
type
==
"mkldnn_tanh"
)
{
return
mkldnn
::
algorithm
::
eltwise_tanh
;
}
else
if
(
type
==
"mkldnn_elu"
)
{
return
mkldnn
::
algorithm
::
eltwise_elu
;
}
else
{
LOG
(
FATAL
)
<<
"Unkown eltwise activation type: "
<<
type
;
}
return
(
mkldnn
::
algorithm
)
0
;
}
/**
* reshape and reset the forward primitives
*/
void
resetFwd
(
Argument
&
act
)
{
if
(
cnt_
==
act
.
value
->
getElementCnt
())
{
return
;
}
cnt_
=
act
.
value
->
getElementCnt
();
stream_
.
reset
(
new
MKLDNNStream
());
auto
eng
=
CPUEngine
::
Instance
().
getEngine
();
// get algo setting
mkldnn
::
algorithm
algo
=
getAlgo
(
this
->
getName
());
// note: alpha represents the NegativeSlope when used in relu.
float
alpha
=
getAlpha
();
float
beta
=
getBeta
();
/// forward
pipelineFwd_
.
clear
();
val_
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
act
.
value
);
if
(
val_
==
nullptr
)
{
int
bs
=
act
.
getBatchSize
();
int
ih
=
act
.
getFrameHeight
()
>
0
?
act
.
getFrameHeight
()
:
1
;
int
iw
=
act
.
getFrameWidth
()
>
0
?
act
.
getFrameWidth
()
:
1
;
int
ic
=
cnt_
/
bs
/
ih
/
iw
;
CHECK_EQ
(
cnt_
,
(
size_t
)
bs
*
ic
*
ih
*
iw
);
val_
=
MKLDNNMatrix
::
create
(
act
.
value
,
{
bs
,
ic
,
ih
,
iw
},
mkldnn
::
memory
::
format
::
nchw
,
eng
);
CHECK
(
val_
);
}
auto
fwdDesc
=
eltwise_fwd
::
desc
(
mkldnn
::
prop_kind
::
forward_training
,
algo
,
val_
->
getMemoryDesc
(),
alpha
,
beta
);
fwdPD_
.
reset
(
new
eltwise_fwd
::
primitive_desc
(
fwdDesc
,
eng
));
// use inplace for forward but save input value before submit
inVal_
=
val_
;
if
(
act
.
grad
)
{
// only copy when need do backward
inVal_
=
MKLDNNMatrix
::
create
(
nullptr
,
val_
->
getPrimitiveDesc
());
copyInVal_
=
std
::
make_shared
<
mkldnn
::
reorder
>
(
*
val_
,
*
inVal_
);
CHECK
(
copyInVal_
)
<<
"should not be emptry"
;
pipelineFwd_
.
push_back
(
*
copyInVal_
);
}
fwd_
.
reset
(
new
eltwise_fwd
(
*
fwdPD_
,
*
val_
,
*
val_
));
pipelineFwd_
.
push_back
(
*
fwd_
);
needResetBwd_
=
true
;
}
/**
* reset the backward primitives, can not merge into resetFwd as the grad data
* would be changing before backward.
*/
void
resetBwd
(
Argument
&
act
)
{
if
(
!
needResetBwd_
)
{
return
;
}
needResetBwd_
=
false
;
mkldnn
::
algorithm
algo
=
getAlgo
(
this
->
getName
());
float
alpha
=
getBwdAlpha
();
float
beta
=
getBeta
();
grad_
=
MKLDNNMatrix
::
create
(
act
.
grad
,
val_
->
getPrimitiveDesc
());
auto
eng
=
CPUEngine
::
Instance
().
getEngine
();
auto
bwdDesc
=
eltwise_bwd
::
desc
(
algo
,
grad_
->
getMemoryDesc
(),
val_
->
getMemoryDesc
(),
alpha
,
beta
);
auto
bwdPD
=
eltwise_bwd
::
primitive_desc
(
bwdDesc
,
eng
,
*
fwdPD_
);
CHECK
(
inVal_
);
bwd_
.
reset
(
new
eltwise_bwd
(
bwdPD
,
*
inVal_
,
*
grad_
,
*
grad_
));
pipelineBwd_
.
clear
();
pipelineBwd_
.
push_back
(
*
bwd_
);
}
Error
__must_check
forward
(
Argument
&
act
)
{
resetFwd
(
act
);
stream_
->
submit
(
pipelineFwd_
);
return
Error
();
}
Error
__must_check
backward
(
Argument
&
act
)
{
resetBwd
(
act
);
stream_
->
submit
(
pipelineBwd_
);
return
Error
();
}
};
}
// namespace paddle
paddle/gserver/layers/MKLDNNConvLayer.cpp
浏览文件 @
37faf495
...
...
@@ -294,12 +294,9 @@ void MKLDNNConvLayer::resetOutValue(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
out
)
{
out
=
MKLDNNMatrix
::
create
(
output_
.
value
,
pd
->
dst_primitive_desc
());
// change original output value from cpu matrix to mkldnn matrix
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
out
);
// create reorder if output value has cpu device and pd do not match
cpuOutVal_
=
nullptr
;
c
pu
OutVal_
=
nullptr
;
c
vt
OutVal_
=
nullptr
;
if
(
!
outputIsOnlyMKLDNN
())
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
value
;
memory
::
dims
outDims
=
memory
::
dims
{
bs_
,
oc_
,
oh_
,
ow_
};
...
...
paddle/gserver/layers/MKLDNNFcLayer.cpp
浏览文件 @
37faf495
...
...
@@ -172,12 +172,10 @@ void MKLDNNFcLayer::resetWgtBiasValue(MKLDNNMatrixPtr& wgt,
void
MKLDNNFcLayer
::
resetOutValue
(
MKLDNNMatrixPtr
&
out
)
{
out
=
MKLDNNMatrix
::
create
(
output_
.
value
,
{
bs_
,
oc_
},
format
::
nc
,
engine_
);
// change original output value to mkldnn output value
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
out
);
if
(
!
outputIsOnlyMKLDNN
())
{
// fc cpu output value do not need create convert
// just share point
getOutput
(
CPU_DEVICE
).
value
->
setData
(
out
put_
.
value
->
getData
());
getOutput
(
CPU_DEVICE
).
value
->
setData
(
out
->
getData
());
}
}
...
...
paddle/gserver/layers/MKLDNNLayer.h
浏览文件 @
37faf495
...
...
@@ -119,6 +119,10 @@ public:
inputElemenCnt_
=
elemenCnt
;
reshape
(
bs_
,
ic_
,
ih_
,
iw_
,
oc_
,
oh_
,
ow_
);
resetFwd
(
pipelineFwd_
,
inVal_
,
wgtVal_
,
biasVal_
,
outVal_
);
if
(
outVal_
)
{
// change original output value to mkldnn output value
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
outVal_
);
}
convertWeightsFromPaddle
();
needResetBwd_
=
true
;
}
...
...
paddle/gserver/layers/MKLDNNPoolLayer.cpp
浏览文件 @
37faf495
...
...
@@ -134,7 +134,6 @@ void MKLDNNPoolLayer::resetOutValue(MKLDNNMatrixPtr& out) {
memory
::
dims
outDims
=
memory
::
dims
{
bs_
,
oc_
,
oh_
,
ow_
};
out
=
MKLDNNMatrix
::
create
(
output_
.
value
,
outDims
,
inVal_
->
getFormat
(),
engine_
);
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
out
);
// create reorder if output value has cpu device and pd do not match
cpuOutVal_
=
nullptr
;
...
...
paddle/gserver/layers/SequenceSliceLayer.cpp
浏览文件 @
37faf495
...
...
@@ -73,9 +73,10 @@ void SequenceSliceLayer::checkInputs() {
CHECK
(
inputSeq
.
hasSeq
())
<<
"The first input of sequence slice layer "
<<
"must be a sequence."
;
const
MatrixPtr
indices1
=
getInputValue
(
1
);
CHECK_EQ
(
static_cast
<
size_t
>
(
indices1
->
getHeight
()),
inputSeq
.
hasSubseq
()
?
inputSeq
.
getNumSubSequences
()
:
inputSeq
.
getNumSequences
())
CHECK_EQ
(
indices1
->
getHeight
(),
static_cast
<
size_t
>
(
inputSeq
.
hasSubseq
()
?
inputSeq
.
getNumSubSequences
()
:
inputSeq
.
getNumSequences
()))
<<
"Height of the second input should be equal to number of sequence "
<<
"in the first input."
;
if
(
inputLayers_
.
size
()
==
3
)
{
...
...
@@ -151,7 +152,7 @@ void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts,
if
(
ends
)
endPos
=
inputSeqInfoVec_
[
i
][
j
]
+
ends
->
getElement
(
rowIdx
,
k
);
int
seqLen
=
endPos
-
begPos
+
1
;
CHECK_GT
(
seqLen
,
0
U
);
CHECK_GT
(
seqLen
,
0
);
for
(
int
m
=
begPos
;
m
<=
endPos
;
++
m
)
selectedRows_
.
push_back
(
m
);
hasSubseq
?
outSubSeqStartPos_
.
push_back
(
outSubSeqStartPos_
.
back
()
+
seqLen
)
...
...
paddle/gserver/tests/MKLDNNTester.cpp
浏览文件 @
37faf495
...
...
@@ -64,15 +64,17 @@ void MKLDNNTester::reset(const TestConfig& dnn,
configs_
[
i
],
&
(
layerMaps_
[
i
]),
&
(
parameters_
[
i
]),
&
(
testLayers_
[
i
]));
}
refLayer_
=
testLayers_
[
REF
];
dnnLayer_
=
std
::
dynamic_pointer_cast
<
MKLDNNLayer
>
(
testLayers_
[
DNN
]);
CHECK
(
dnnLayer_
);
// for comparison with Paddle reference results,
// need manually add cpu device output for test
dnnLayer_
->
addOutputArgument
(
CPU_DEVICE
);
dnnLayer_
=
testLayers_
[
DNN
];
EXPECT_EQ
(
dataLayers_
[
DNN
].
size
(),
dataLayers_
[
REF
].
size
());
EXPECT_EQ
(
parameters_
[
DNN
].
size
(),
parameters_
[
REF
].
size
());
setInputImgSize
();
// for comparison with Paddle reference results,
// need manually add cpu device output for test
MKLDNNLayerPtr
dnnLayer
=
std
::
dynamic_pointer_cast
<
MKLDNNLayer
>
(
dnnLayer_
);
if
(
dnnLayer
)
{
dnnLayer
->
addOutputArgument
(
CPU_DEVICE
);
}
}
void
MKLDNNTester
::
setInputImgSize
()
{
...
...
@@ -122,7 +124,7 @@ void MKLDNNTester::randomTopDiffs() {
void
MKLDNNTester
::
checkForward
()
{
VLOG
(
MKLDNN_ALL
)
<<
"Check Forward"
;
printTopDatas
();
double
delta
=
compareMatrix
(
dnnLayer_
->
getOutput
(
-
1
).
value
,
double
delta
=
compareMatrix
(
dnnLayer_
->
getOutput
(
CPU_DEVICE
).
value
,
refLayer_
->
getOutputValue
());
EXPECT_LE
(
fabs
(
delta
),
eps_
);
}
...
...
@@ -155,7 +157,10 @@ void MKLDNNTester::checkBackwardWgts() {
vector
<
VectorPtr
>
dnnWgts
;
// used to temply save mkldnn weights
saveWgt
(
parameters_
[
DNN
],
dnnWgts
);
dnnLayer_
->
convertWeightsToPaddle
();
MKLDNNLayerPtr
dnnLayer
=
std
::
dynamic_pointer_cast
<
MKLDNNLayer
>
(
dnnLayer_
);
if
(
dnnLayer
)
{
dnnLayer
->
convertWeightsToPaddle
();
}
for
(
size_t
i
=
0
;
i
<
parameters_
[
DNN
].
size
();
++
i
)
{
const
VectorPtr
&
dnn
=
parameters_
[
DNN
][
i
]
->
getBuf
(
PARAMETER_VALUE
);
const
VectorPtr
&
ref
=
parameters_
[
REF
][
i
]
->
getBuf
(
PARAMETER_VALUE
);
...
...
@@ -322,6 +327,10 @@ void MKLDNNTester::runOnce() {
// and clearTopDatas(REF) should be coverd by ref layers
clearBotDiffs
(
REF
);
clearWgtDiffs
(
REF
);
// it is necessary to clear bottom diffs when only activation is dnn type
if
(
configs_
[
DNN
].
layerConfig
.
active_type
().
compare
(
0
,
7
,
"mkldnn_"
)
==
0
)
{
clearBotDiffs
(
DNN
);
}
}
void
MKLDNNTester
::
run
(
const
TestConfig
&
dnn
,
...
...
@@ -333,8 +342,19 @@ void MKLDNNTester::run(const TestConfig& dnn,
float
epsilon
,
bool
log
,
int
level
)
{
VLOG
(
MKLDNN_TESTS
)
<<
"Test MKLDNN functionality: "
<<
dnn
.
layerConfig
.
type
()
<<
" vs "
<<
ref
.
layerConfig
.
type
();
CHECK
(
dnn
.
layerConfig
.
type
().
compare
(
0
,
7
,
"mkldnn_"
)
==
0
||
dnn
.
layerConfig
.
active_type
().
compare
(
0
,
7
,
"mkldnn_"
)
==
0
)
<<
"should be MKLDNN layer or MKLDNN activation"
;
if
(
dnn
.
layerConfig
.
type
()
==
ref
.
layerConfig
.
type
())
{
VLOG
(
MKLDNN_TESTS
)
<<
"Test MKLDNN functionality: "
<<
dnn
.
layerConfig
.
active_type
()
<<
" vs "
<<
ref
.
layerConfig
.
active_type
();
}
else
{
VLOG
(
MKLDNN_TESTS
)
<<
"Test MKLDNN functionality: "
<<
dnn
.
layerConfig
.
type
()
<<
" vs "
<<
ref
.
layerConfig
.
type
();
}
ih_
=
inputImgH
;
iw_
=
inputImgW
;
iter_
=
iter
;
...
...
paddle/gserver/tests/MKLDNNTester.h
浏览文件 @
37faf495
...
...
@@ -41,8 +41,7 @@ protected:
vector
<
LayerMap
>
layerMaps_
;
vector
<
vector
<
ParameterPtr
>>
parameters_
;
vector
<
LayerPtr
>
testLayers_
;
LayerPtr
refLayer_
;
MKLDNNLayerPtr
dnnLayer_
;
LayerPtr
refLayer_
,
dnnLayer_
;
/// run some iterations, all the result should pass
size_t
iter_
;
...
...
paddle/gserver/tests/test_MKLDNN.cpp
浏览文件 @
37faf495
...
...
@@ -17,6 +17,7 @@ limitations under the License. */
#include <vector>
#include "MKLDNNTester.h"
#include "ModelConfig.pb.h"
#include "paddle/gserver/activations/MKLDNNActivation.h"
#include "paddle/math/MathUtils.h"
using
namespace
paddle
;
// NOLINT
...
...
@@ -162,7 +163,6 @@ void testPoolLayer(const testPoolDesc& pm) {
0
});
LayerInputConfig
*
input
=
cfg
.
layerConfig
.
add_inputs
();
PoolConfig
*
pool
=
input
->
mutable_pool_conf
();
// pool->set_pool_type(poolType);
pool
->
set_channels
(
pm
.
ch
);
pool
->
set_img_size
(
pm
.
iw
);
pool
->
set_img_size_y
(
pm
.
ih
);
...
...
@@ -191,7 +191,7 @@ void testPoolLayer(const testPoolDesc& pm) {
}
}
TEST
(
M
kldnn
Layer
,
PoolLayer
)
{
TEST
(
M
KLDNN
Layer
,
PoolLayer
)
{
/* bs, ch, ih, iw, oh, ow, fh, fw, ph, pw, sh, sw*/
testPoolLayer
({
2
,
1
,
4
,
4
,
2
,
2
,
3
,
3
,
0
,
0
,
2
,
2
});
testPoolLayer
({
10
,
8
,
16
,
16
,
8
,
8
,
2
,
2
,
0
,
0
,
2
,
2
});
...
...
@@ -203,6 +203,49 @@ TEST(MkldnnLayer, PoolLayer) {
testPoolLayer
({
2
,
8
,
56
,
56
,
29
,
29
,
3
,
3
,
1
,
1
,
2
,
2
});
}
struct
testActDesc
{
int
bs
,
ch
;
int
ih
,
iw
;
};
static
void
getAddtoConfig
(
TestConfig
&
cfg
,
const
testActDesc
&
pm
)
{
cfg
.
biasSize
=
0
;
cfg
.
layerConfig
.
set_type
(
"addto"
);
cfg
.
layerConfig
.
set_size
(
pm
.
ch
*
pm
.
ih
*
pm
.
iw
);
cfg
.
inputDefs
.
push_back
(
{
INPUT_DATA
,
"layer_0"
,
/* size of input layer= */
size_t
(
pm
.
ch
*
pm
.
ih
*
pm
.
iw
),
0
});
cfg
.
layerConfig
.
add_inputs
();
}
void
testActivation
(
std
::
string
&
type
,
const
testActDesc
&
pm
)
{
const
std
::
string
compareTypes
[]
=
{
type
,
type
.
erase
(
0
,
7
)};
TestConfig
cfg
;
getAddtoConfig
(
cfg
,
pm
);
TestConfig
ref
=
cfg
;
cfg
.
layerConfig
.
set_active_type
(
compareTypes
[
0
]);
ref
.
layerConfig
.
set_active_type
(
compareTypes
[
1
]);
MKLDNNTester
tester
;
for
(
auto
bs
:
{
pm
.
bs
,
1
})
{
tester
.
run
(
cfg
,
ref
,
bs
,
pm
.
ih
,
pm
.
iw
);
}
}
TEST
(
MKLDNNActivation
,
Activations
)
{
auto
types
=
MKLDNNActivation
::
getAllRegisteredTypes
();
// TODO(TJ): mkldnn_softmax not implemented, paddle do not have elu activation
std
::
set
<
string
>
excluded
{
"mkldnn_softmax"
,
"mkldnn_elu"
};
for
(
auto
type
:
types
)
{
if
(
excluded
.
count
(
type
))
{
continue
;
}
testActivation
(
type
,
{
16
,
64
,
32
,
32
});
}
}
// TODO(TJ): add branch test
int
main
(
int
argc
,
char
**
argv
)
{
...
...
paddle/math/MathFunctions.h
浏览文件 @
37faf495
...
...
@@ -26,7 +26,7 @@ limitations under the License. */
#include <mkl_lapacke.h>
#endif
#if
def PADDLE_USE_ATLAS
#if
defined(PADDLE_USE_ATLAS) || defined(PADDLE_USE_VECLIB)
extern
"C"
{
#include <cblas.h>
#include <clapack.h>
...
...
paddle/operators/cross_entropy_op.cc
0 → 100644
浏览文件 @
37faf495
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/cross_entropy_op.h"
namespace
paddle
{
namespace
operators
{
using
framework
::
LoDTensor
;
class
CrossEntropyOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
protected:
void
InferShape
(
const
framework
::
InferShapeContext
&
ctx
)
const
override
{
PADDLE_ENFORCE_NOT_NULL
(
ctx
.
InputVar
(
"X"
),
"Input(X) must not be null."
);
PADDLE_ENFORCE_NOT_NULL
(
ctx
.
InputVar
(
"Label"
),
"Input(Label) must not be null."
);
PADDLE_ENFORCE_NOT_NULL
(
ctx
.
OutputVar
(
"Y"
),
"Output(Y) must not be null."
);
auto
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
label
=
ctx
.
Input
<
Tensor
>
(
"Label"
);
PADDLE_ENFORCE_EQ
(
x
->
dims
().
size
(),
2
,
"Input(X)'s rank must be 2."
);
PADDLE_ENFORCE_EQ
(
label
->
dims
().
size
(),
2
,
"Input(Label)'s rank must be 2."
);
// TODO(xinghai-sun): remove this check after swtiching to bool
PADDLE_ENFORCE
(
ctx
.
Attr
<
int
>
(
"soft_label"
)
==
0
||
ctx
.
Attr
<
int
>
(
"soft_label"
)
==
1
);
PADDLE_ENFORCE_EQ
(
x
->
dims
()[
0
],
label
->
dims
()[
0
],
"The 1st dimension of Input(X) and Input(Label) must "
"be equal."
);
if
(
ctx
.
Attr
<
int
>
(
"soft_label"
)
==
1
)
{
PADDLE_ENFORCE_EQ
(
x
->
dims
()[
1
],
label
->
dims
()[
1
],
"If Attr(soft_label) == 1, The 2nd dimension of "
"Input(X) and Input(Label) must be equal."
);
}
else
{
PADDLE_ENFORCE_EQ
(
label
->
dims
()[
1
],
1
,
"If Attr(soft_label) == 0, The 2nd dimension of "
"Input(Label) must be 1."
);
}
ctx
.
Output
<
LoDTensor
>
(
"Y"
)
->
Resize
({
x
->
dims
()[
0
],
1
});
}
};
class
CrossEntropyGradientOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
protected:
void
InferShape
(
const
framework
::
InferShapeContext
&
ctx
)
const
override
{
PADDLE_ENFORCE_NOT_NULL
(
ctx
.
InputVar
(
"X"
),
"Input(X) must not be null."
);
PADDLE_ENFORCE_NOT_NULL
(
ctx
.
InputVar
(
"Label"
),
"Input(Label) must not be null."
);
PADDLE_ENFORCE_NOT_NULL
(
ctx
.
InputVar
(
framework
::
GradVarName
(
"Y"
)),
"Input(Y@GRAD) must not be null."
);
auto
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
label
=
ctx
.
Input
<
Tensor
>
(
"Label"
);
auto
dy
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
PADDLE_ENFORCE_EQ
(
x
->
dims
().
size
(),
2
,
"Input(X)'s rank must be 2."
);
PADDLE_ENFORCE_EQ
(
dy
->
dims
().
size
(),
2
,
"Input(Y@Grad)'s rank must be 2."
);
PADDLE_ENFORCE_EQ
(
label
->
dims
().
size
(),
2
,
"Input(Label)'s rank must be 2."
);
// TODO(xinghai-sun): remove this check after swtiching to bool
PADDLE_ENFORCE
(
ctx
.
Attr
<
int
>
(
"soft_label"
)
==
0
||
ctx
.
Attr
<
int
>
(
"soft_label"
)
==
1
);
PADDLE_ENFORCE_EQ
(
x
->
dims
()[
0
],
label
->
dims
()[
0
],
"The 1st dimension of Input(X) and Input(Label) must "
"be equal."
);
PADDLE_ENFORCE_EQ
(
x
->
dims
()[
0
],
dy
->
dims
()[
0
],
"The 1st dimension of Input(X) and Input(Y@Grad) must "
"be equal."
);
PADDLE_ENFORCE_EQ
(
dy
->
dims
()[
1
],
1
,
"The 2nd dimension of Input(Y@Grad) must be 1."
);
if
(
ctx
.
Attr
<
int
>
(
"soft_label"
)
==
1
)
{
PADDLE_ENFORCE_EQ
(
x
->
dims
()[
1
],
label
->
dims
()[
1
],
"If Attr(soft_label) == 1, The 2nd dimension of "
"Input(X) and Input(Label) must be equal."
);
}
else
{
PADDLE_ENFORCE_EQ
(
label
->
dims
()[
1
],
1
,
"If Attr(soft_label) == 0, The 2nd dimension of "
"Input(Label) must be 1."
);
}
auto
dx
=
ctx
.
Output
<
LoDTensor
>
(
framework
::
GradVarName
(
"X"
));
dx
->
Resize
(
x
->
dims
());
}
};
class
CrossEntropyOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
CrossEntropyOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"The first input of CrossEntropyOp"
);
AddInput
(
"Label"
,
"The second input of CrossEntropyOp"
);
AddOutput
(
"Y"
,
"The output of CrossEntropyOp"
);
AddAttr
<
int
>
(
"soft_label"
,
"Is soft label. Default zero."
).
SetDefault
(
0
);
AddComment
(
R"DOC(
CrossEntropy Operator.
It supports both standard cross-entropy and soft-label cross-entropy loss
computation.
1) One-hot cross-entropy:
soft_label = 0, Label[i, 0] indicates the class index for sample i:
Y[i] = -log(X[i, Label[i]])
2) Soft-label cross-entropy:
soft_label = 1, Label[i, j] indicates the soft label of class j
for sample i:
Y[i] = \sum_j{-Label[i, j] * log(X[i, j])}
Please make sure that in this case the summuation of each row of Label
equals one.
3) One-hot cross-entropy with vecterized Input(Label):
As a special case of 2), when each row of Input(Label) has only one
non-zero element (equals 1), soft-label cross-entropy degenerates to a
one-hot cross-entropy with one-hot label representation.
)DOC"
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
cross_entropy
,
ops
::
CrossEntropyOp
,
ops
::
CrossEntropyOpMaker
,
cross_entropy_grad
,
ops
::
CrossEntropyGradientOp
);
REGISTER_OP_CPU_KERNEL
(
cross_entropy
,
ops
::
CrossEntropyOpKernel
<
float
>
);
REGISTER_OP_CPU_KERNEL
(
cross_entropy_grad
,
ops
::
CrossEntropyGradientOpKernel
<
float
>
);
paddle/operators/
onehot_
cross_entropy_op.cu
→
paddle/operators/cross_entropy_op.cu
浏览文件 @
37faf495
...
...
@@ -13,27 +13,13 @@
limitations under the License. */
#include "paddle/framework/op_registry.h"
#include "paddle/operators/cross_entropy_op.h"
#include "paddle/platform/assert.h"
#include "paddle/platform/hostdevice.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
template
<
typename
T
>
__host__
__device__
T
clipping_log
(
const
T
x
)
{
PADDLE_ASSERT
(
std
::
is_floating_point
<
T
>::
value
);
const
T
kApproInf
=
1e20
;
T
v
=
log
(
x
);
if
(
v
==
INFINITY
)
{
return
kApproInf
;
}
if
(
v
==
-
INFINITY
)
{
return
-
kApproInf
;
}
return
v
;
}
template
<
typename
T
>
__global__
void
CrossEntropyKernel
(
T
*
Y
,
const
T
*
X
,
const
int
*
label
,
const
int
N
,
const
int
D
)
{
...
...
@@ -42,7 +28,20 @@ __global__ void CrossEntropyKernel(T* Y, const T* X, const int* label,
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
N
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
PADDLE_ASSERT
(
label
[
i
]
>=
0
&&
label
[
i
]
<
D
);
Y
[
i
]
=
-
clipping_log
(
X
[
i
*
D
+
label
[
i
]]);
Y
[
i
]
=
-
tolerable_value
(
log
(
X
[
i
*
D
+
label
[
i
]]));
}
}
template
<
typename
T
>
__global__
void
SoftCrossEntropyKernel
(
T
*
Y
,
const
T
*
X
,
const
T
*
label
,
const
int
N
,
const
int
D
)
{
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
N
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
T
sum
=
static_cast
<
T
>
(
0
);
for
(
int
j
=
0
;
j
<
D
;
j
++
)
{
sum
+=
label
[
i
*
D
+
j
]
*
tolerable_value
(
log
(
X
[
i
*
D
+
j
]));
}
Y
[
i
]
=
-
sum
;
}
}
...
...
@@ -69,57 +68,84 @@ __global__ void CrossEntropyGradientKernel(T* dX, const T* dY, const T* X,
}
template
<
typename
T
>
class
OnehotCrossEntropyOpCUDAKernel
:
public
framework
::
OpKernel
{
__global__
void
SoftCrossEntropyGradientKernel
(
T
*
dX
,
const
T
*
dY
,
const
T
*
X
,
const
T
*
label
,
const
int
N
,
const
int
D
)
{
// TOOD(qingqing): optimize for this kernel
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
N
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
j
=
0
;
j
<
D
;
++
j
)
{
int
idx
=
i
*
D
+
j
;
dX
[
idx
]
=
-
label
[
idx
]
*
dY
[
i
]
/
X
[
idx
];
}
}
}
template
<
typename
T
>
class
CrossEntropyOpCUDAKernel
:
public
framework
::
OpKernel
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
"It must use GPUPlace."
);
auto
X
=
ctx
.
Input
<
Tensor
>
(
"X"
);
const
T
*
Xdata
=
X
->
data
<
T
>
();
const
int
*
label_data
=
ctx
.
Input
<
Tensor
>
(
"label"
)
->
data
<
int
>
();
auto
Y
=
ctx
.
Output
<
Tensor
>
(
"Y"
);
Y
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
Ydata
=
Y
->
data
<
T
>
();
auto
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
y
=
ctx
.
Output
<
Tensor
>
(
"Y"
);
auto
label
=
ctx
.
Input
<
Tensor
>
(
"Label"
);
int
N
=
X
->
dims
()[
0
];
int
D
=
X
->
dims
()[
1
];
auto
*
x_data
=
x
->
data
<
T
>
();
y
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
*
y_data
=
y
->
data
<
T
>
();
int
n
=
x
->
dims
()[
0
];
int
d
=
x
->
dims
()[
1
];
int
block
=
512
;
int
grid
=
(
N
+
block
-
1
)
/
block
;
int
grid
=
(
n
+
block
-
1
)
/
block
;
// TODO(qingqing) launch kernel on specified stream
// base on ExecutionContext.
CrossEntropyKernel
<
T
><<<
grid
,
block
>>>
(
Ydata
,
Xdata
,
label_data
,
N
,
D
);
if
(
ctx
.
Attr
<
int
>
(
"soft_label"
)
==
1
)
{
auto
*
label_data
=
ctx
.
Input
<
Tensor
>
(
"Label"
)
->
data
<
T
>
();
SoftCrossEntropyKernel
<
T
><<<
grid
,
block
>>>
(
y_data
,
x_data
,
label_data
,
n
,
d
);
}
else
{
auto
*
label_data
=
ctx
.
Input
<
Tensor
>
(
"Label"
)
->
data
<
int
>
();
CrossEntropyKernel
<
T
><<<
grid
,
block
>>>
(
y_data
,
x_data
,
label_data
,
n
,
d
);
}
}
};
template
<
typename
T
>
class
Onehot
CrossEntropyGradientOpCUDAKernel
:
public
framework
::
OpKernel
{
class
CrossEntropyGradientOpCUDAKernel
:
public
framework
::
OpKernel
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
"It must use GPUPlace."
);
auto
X
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
d
X
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
d
Y
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
auto
label
=
ctx
.
Input
<
Tensor
>
(
"
l
abel"
);
auto
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
d
x
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
d
y
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
auto
label
=
ctx
.
Input
<
Tensor
>
(
"
L
abel"
);
auto
*
dXdata
=
dX
->
template
mutable_data
<
T
>(
ctx
.
GetPlace
());
auto
*
dYdata
=
dY
->
template
data
<
T
>();
auto
*
Xdata
=
X
->
template
data
<
T
>();
auto
*
label_data
=
label
->
data
<
int
>
();
auto
*
dx_data
=
dx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
*
dy_data
=
dy
->
data
<
T
>
();
auto
*
x_data
=
x
->
data
<
T
>
();
int
N
=
X
->
dims
()[
0
];
int
D
=
X
->
dims
()[
1
];
int
n
=
x
->
dims
()[
0
];
int
d
=
x
->
dims
()[
1
];
int
block
=
512
;
int
grid
=
(
N
*
D
+
block
-
1
)
/
block
;
zero
<
T
><<<
grid
,
block
>>>
(
dXdata
,
N
*
D
);
grid
=
(
N
+
block
-
1
)
/
block
;
int
grid
=
(
n
*
d
+
block
-
1
)
/
block
;
zero
<
T
><<<
grid
,
block
>>>
(
dx_data
,
n
*
d
);
grid
=
(
n
+
block
-
1
)
/
block
;
// TODO(qingqing): launch kernel on specified stream
// base on ExecutionContext.
CrossEntropyGradientKernel
<
T
><<<
grid
,
block
>>>
(
dXdata
,
dYdata
,
Xdata
,
label_data
,
N
,
D
);
if
(
ctx
.
Attr
<
int
>
(
"soft_label"
)
==
1
)
{
auto
*
label_data
=
label
->
data
<
T
>
();
SoftCrossEntropyGradientKernel
<
T
><<<
grid
,
block
>>>
(
dx_data
,
dy_data
,
x_data
,
label_data
,
n
,
d
);
}
else
{
auto
*
label_data
=
label
->
data
<
int
>
();
CrossEntropyGradientKernel
<
T
><<<
grid
,
block
>>>
(
dx_data
,
dy_data
,
x_data
,
label_data
,
n
,
d
);
}
}
};
...
...
@@ -127,7 +153,6 @@ class OnehotCrossEntropyGradientOpCUDAKernel : public framework::OpKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_GPU_KERNEL
(
onehot_cross_entropy
,
ops
::
OnehotCrossEntropyOpCUDAKernel
<
float
>
);
REGISTER_OP_GPU_KERNEL
(
onehot_cross_entropy_grad
,
ops
::
OnehotCrossEntropyGradientOpCUDAKernel
<
float
>
);
REGISTER_OP_GPU_KERNEL
(
cross_entropy
,
ops
::
CrossEntropyOpCUDAKernel
<
float
>
);
REGISTER_OP_GPU_KERNEL
(
cross_entropy_grad
,
ops
::
CrossEntropyGradientOpCUDAKernel
<
float
>
);
paddle/operators/
onehot_
cross_entropy_op.h
→
paddle/operators/cross_entropy_op.h
浏览文件 @
37faf495
...
...
@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#include "paddle/framework/op_registry.h"
#include "paddle/platform/hostdevice.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -21,75 +22,93 @@ namespace operators {
using
Tensor
=
framework
::
Tensor
;
template
<
typename
T
>
inline
T
tolerable_value
(
const
T
x
)
{
static_assert
(
std
::
is_floating_point
<
T
>::
value
,
"tolerable_value works only on float, "
"double and double double."
);
HOSTDEVICE
T
tolerable_value
(
const
T
x
)
{
PADDLE_ASSERT
(
std
::
is_floating_point
<
T
>::
value
);
const
T
kApproInf
=
1e20
;
if
(
x
==
INFINITY
)
{
return
kApproInf
;
}
if
(
x
==
-
INFINITY
)
{
return
-
kApproInf
;
}
return
x
;
}
template
<
typename
T
>
class
Onehot
CrossEntropyOpKernel
:
public
framework
::
OpKernel
{
class
CrossEntropyOpKernel
:
public
framework
::
OpKernel
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"It must use CPUPlace."
);
auto
X
=
ctx
.
Input
<
Tensor
>
(
"X"
);
const
T
*
Xdata
=
X
->
data
<
T
>
();
const
int
*
label_data
=
ctx
.
Input
<
Tensor
>
(
"label"
)
->
data
<
int
>
();
auto
Y
=
ctx
.
Output
<
Tensor
>
(
"Y"
);
Y
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
Ydata
=
Y
->
data
<
T
>
();
int
batch_size
=
X
->
dims
()[
0
];
int
class_num
=
X
->
dims
()[
1
];
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
int
index
=
i
*
class_num
+
label_data
[
i
];
Ydata
[
i
]
=
-
tolerable_value
(
std
::
log
(
Xdata
[
index
]));
auto
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
y
=
ctx
.
Output
<
Tensor
>
(
"Y"
);
auto
*
x_data
=
x
->
data
<
T
>
();
y
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
*
y_data
=
y
->
data
<
T
>
();
int
batch_size
=
x
->
dims
()[
0
];
int
class_num
=
x
->
dims
()[
1
];
if
(
ctx
.
Attr
<
int
>
(
"soft_label"
)
==
1
)
{
auto
*
label_data
=
ctx
.
Input
<
Tensor
>
(
"Label"
)
->
data
<
T
>
();
int
index
=
0
;
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
T
sum
=
static_cast
<
T
>
(
0
);
for
(
int
j
=
0
;
j
<
class_num
;
++
j
)
{
sum
+=
label_data
[
index
]
*
tolerable_value
(
std
::
log
(
x_data
[
index
]));
y_data
[
i
]
=
-
sum
;
index
++
;
}
}
}
else
{
auto
*
label_data
=
ctx
.
Input
<
Tensor
>
(
"Label"
)
->
data
<
int
>
();
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
int
index
=
i
*
class_num
+
label_data
[
i
];
y_data
[
i
]
=
-
tolerable_value
(
std
::
log
(
x_data
[
index
]));
}
}
}
};
template
<
typename
T
>
class
Onehot
CrossEntropyGradientOpKernel
:
public
framework
::
OpKernel
{
class
CrossEntropyGradientOpKernel
:
public
framework
::
OpKernel
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"It must use CPUPlace."
);
auto
X
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
d
X
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
d
Y
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
auto
label
=
ctx
.
Input
<
Tensor
>
(
"
l
abel"
);
auto
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
d
x
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
d
y
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
auto
label
=
ctx
.
Input
<
Tensor
>
(
"
L
abel"
);
auto
*
dXdata
=
dX
->
template
mutable_data
<
T
>(
ctx
.
GetPlace
());
auto
*
dYdata
=
dY
->
template
data
<
T
>();
auto
*
Xdata
=
X
->
template
data
<
T
>();
auto
*
label_data
=
label
->
data
<
int
>
();
auto
*
dx_data
=
dx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
*
dy_data
=
dy
->
data
<
T
>
();
auto
*
x_data
=
x
->
data
<
T
>
();
const
int
batch_size
=
X
->
dims
()[
0
];
const
int
class_num
=
X
->
dims
()[
1
];
int
batch_size
=
x
->
dims
()[
0
];
int
class_num
=
x
->
dims
()[
1
];
// TODO(qingqing): make zero setting an common function.
memset
(
dXdata
,
0
,
sizeof
(
T
)
*
batch_size
*
class_num
);
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
int
index
=
i
*
class_num
+
label_data
[
i
];
dXdata
[
index
]
=
-
tolerable_value
(
dYdata
[
i
]
/
Xdata
[
index
]);
if
(
ctx
.
Attr
<
int
>
(
"soft_label"
)
==
1
)
{
auto
*
label_data
=
ctx
.
Input
<
Tensor
>
(
"Label"
)
->
data
<
T
>
();
int
index
=
0
;
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
for
(
int
j
=
0
;
j
<
class_num
;
++
j
)
{
dx_data
[
index
]
=
-
label_data
[
index
]
*
dy_data
[
i
]
/
x_data
[
index
];
index
++
;
}
}
}
else
{
auto
*
label_data
=
label
->
data
<
int
>
();
memset
(
dx_data
,
0
,
sizeof
(
T
)
*
batch_size
*
class_num
);
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
PADDLE_ASSERT
(
label_data
[
i
]
>=
0
||
label_data
[
i
]
<
class_num
);
int
index
=
i
*
class_num
+
label_data
[
i
];
dx_data
[
index
]
=
-
dy_data
[
i
]
/
x_data
[
index
];
}
}
}
};
...
...
paddle/operators/onehot_cross_entropy_op.cc
已删除
100644 → 0
浏览文件 @
e635e3fd
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/onehot_cross_entropy_op.h"
namespace
paddle
{
namespace
operators
{
class
OnehotCrossEntropyOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
protected:
void
InferShape
(
const
framework
::
InferShapeContext
&
ctx
)
const
override
{
PADDLE_ENFORCE_NOT_NULL
(
ctx
.
InputVar
(
"X"
),
"Input(X) of OnehotCrossEntropyOp should not be null."
);
PADDLE_ENFORCE_NOT_NULL
(
ctx
.
InputVar
(
"label"
),
"Input(label) of OnehotCrossEntropyOp should not be null."
);
PADDLE_ENFORCE_NOT_NULL
(
ctx
.
OutputVar
(
"Y"
),
"Output(Y) of OnehotCrossEntropyOp should not be null."
);
auto
*
X
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
label
=
ctx
.
Input
<
Tensor
>
(
"label"
);
PADDLE_ENFORCE_EQ
(
X
->
dims
().
size
(),
2
,
"X's dimension must be 2."
);
PADDLE_ENFORCE_EQ
(
label
->
dims
().
size
(),
1
,
"label's dimension must be 1."
);
PADDLE_ENFORCE_EQ
(
X
->
dims
()[
0
],
label
->
dims
()[
0
]);
ctx
.
Output
<
framework
::
LoDTensor
>
(
"Y"
)
->
Resize
({
X
->
dims
()[
0
],
1
});
}
};
class
OnehotCrossEntropyGradientOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
protected:
void
InferShape
(
const
framework
::
InferShapeContext
&
ctx
)
const
override
{
auto
dX
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
framework
::
GradVarName
(
"X"
));
auto
X
=
ctx
.
Input
<
Tensor
>
(
"X"
);
dX
->
Resize
(
X
->
dims
());
}
};
class
OnehotCrossEntropyOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
OnehotCrossEntropyOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"The first input of OnehotCrossEntropyOp"
);
AddInput
(
"label"
,
"The second input of OnehotCrossEntropyOp"
);
AddOutput
(
"Y"
,
"The output of OnehotCrossEntropyOp"
);
AddComment
(
R"DOC(
OnehotCrossEntropy Operator.
Y[i] = -log(X[i][j])
)DOC"
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
onehot_cross_entropy
,
ops
::
OnehotCrossEntropyOp
,
ops
::
OnehotCrossEntropyOpMaker
,
onehot_cross_entropy_grad
,
ops
::
OnehotCrossEntropyGradientOp
);
REGISTER_OP_CPU_KERNEL
(
onehot_cross_entropy
,
ops
::
OnehotCrossEntropyOpKernel
<
float
>
);
REGISTER_OP_CPU_KERNEL
(
onehot_cross_entropy_grad
,
ops
::
OnehotCrossEntropyGradientOpKernel
<
float
>
);
paddle/operators/prelu_op.h
浏览文件 @
37faf495
...
...
@@ -54,8 +54,9 @@ class PReluKernel : public framework::OpKernel {
int
numel
=
x
->
numel
();
Transform
(
context
.
device_context
(),
x_ptr
,
x_ptr
+
numel
,
o_ptr
,
PReluFunctor
<
T
>
(
alpha_ptr
));
Transform
<
Place
>
trans
;
trans
(
context
.
device_context
(),
x_ptr
,
x_ptr
+
numel
,
o_ptr
,
PReluFunctor
<
T
>
(
alpha_ptr
));
}
};
...
...
@@ -91,8 +92,9 @@ class PReluGradKernel : public framework::OpKernel {
const
T
*
out_ptr
=
out
->
data
<
T
>
();
int
numel
=
dx
->
numel
();
Transform
(
context
.
device_context
(),
out_ptr
,
out_ptr
+
numel
,
dout_ptr
,
dx_ptr
,
PReluGradFunctor
<
T
>
(
alpha_ptr
));
Transform
<
Place
>
trans
;
trans
(
context
.
device_context
(),
out_ptr
,
out_ptr
+
numel
,
dout_ptr
,
dx_ptr
,
PReluGradFunctor
<
T
>
(
alpha_ptr
));
// TODO (Zhuoyuan): add dalpha upgrade when GPU kernels ready
}
...
...
paddle/platform/transform.h
浏览文件 @
37faf495
...
...
@@ -29,45 +29,71 @@
namespace
paddle
{
namespace
platform
{
// Transform on host or device. It provides the same API in std library.
template
<
typename
InputIter
,
typename
OutputIter
,
typename
UnaryOperation
>
void
Transform
(
const
DeviceContext
&
context
,
InputIter
first
,
InputIter
last
,
OutputIter
result
,
UnaryOperation
op
)
{
auto
place
=
context
.
GetPlace
();
if
(
is_cpu_place
(
place
))
{
template
<
typename
Place
>
struct
Transform
{
template
<
typename
InputIter
,
typename
OutputIter
,
typename
UnaryOperation
>
void
operator
()(
const
DeviceContext
&
context
,
InputIter
first
,
InputIter
last
,
OutputIter
result
,
UnaryOperation
op
);
template
<
typename
InputIter1
,
typename
InputIter2
,
typename
OutputIter
,
typename
BinaryOperation
>
void
operator
()(
const
DeviceContext
&
context
,
InputIter1
first1
,
InputIter1
last1
,
InputIter2
first2
,
OutputIter
result
,
BinaryOperation
op
);
};
template
<
>
struct
Transform
<
platform
::
CPUPlace
>
{
template
<
typename
InputIter
,
typename
OutputIter
,
typename
UnaryOperation
>
void
operator
()(
const
DeviceContext
&
context
,
InputIter
first
,
InputIter
last
,
OutputIter
result
,
UnaryOperation
op
)
{
auto
place
=
context
.
GetPlace
();
PADDLE_ENFORCE
(
is_cpu_place
(
place
),
"It must use CPU place."
);
std
::
transform
(
first
,
last
,
result
,
op
);
}
else
{
#ifdef __NVCC__
auto
&
ctx
=
reinterpret_cast
<
const
CUDADeviceContext
&>
(
context
);
using
namespace
details
;
thrust
::
transform
(
thrust
::
cuda
::
par
.
on
(
ctx
.
stream
()),
DevPtrCast
(
first
),
DevPtrCast
(
last
),
DevPtrCast
(
result
),
op
);
#else
PADDLE_THROW
(
"Do not invoke `Transform<GPUPlace>` in .cc file"
);
#endif
}
}
template
<
typename
InputIter1
,
typename
InputIter2
,
typename
OutputIter
,
typename
BinaryOperation
>
void
Transform
(
const
DeviceContext
&
context
,
InputIter1
first1
,
InputIter1
last1
,
InputIter2
first2
,
OutputIter
result
,
BinaryOperation
op
)
{
auto
place
=
context
.
GetPlace
();
if
(
is_cpu_place
(
place
))
{
template
<
typename
InputIter1
,
typename
InputIter2
,
typename
OutputIter
,
typename
BinaryOperation
>
void
operator
()
(
const
DeviceContext
&
context
,
InputIter1
first1
,
InputIter1
last1
,
InputIter2
first2
,
OutputIter
result
,
BinaryOperation
op
)
{
auto
place
=
context
.
GetPlace
();
PADDLE_ENFORCE
(
is_cpu_place
(
place
),
"It must use CPU place."
);
std
::
transform
(
first1
,
last1
,
first2
,
result
,
op
);
}
else
{
}
};
#ifdef __NVCC__
template
<
>
struct
Transform
<
platform
::
GPUPlace
>
{
template
<
typename
InputIter
,
typename
OutputIter
,
typename
UnaryOperation
>
void
operator
()(
const
DeviceContext
&
context
,
InputIter
first
,
InputIter
last
,
OutputIter
result
,
UnaryOperation
op
)
{
auto
place
=
context
.
GetPlace
();
PADDLE_ENFORCE
(
is_gpu_place
(
place
),
"It must use GPU place."
);
auto
&
ctx
=
reinterpret_cast
<
const
CUDADeviceContext
&>
(
context
);
using
namespace
details
;
thrust
::
transform
(
thrust
::
cuda
::
par
.
on
(
ctx
.
stream
()),
DevPtrCast
(
first1
),
DevPtrCast
(
last1
),
DevPtrCast
(
first2
),
DevPtrCast
(
result
),
thrust
::
transform
(
thrust
::
cuda
::
par
.
on
(
ctx
.
stream
()),
details
::
DevPtrCast
(
first
),
details
::
DevPtrCast
(
last
),
details
::
DevPtrCast
(
result
),
op
);
}
template
<
typename
InputIter1
,
typename
InputIter2
,
typename
OutputIter
,
typename
BinaryOperation
>
void
operator
()(
const
DeviceContext
&
context
,
InputIter1
first1
,
InputIter1
last1
,
InputIter2
first2
,
OutputIter
result
,
BinaryOperation
op
)
{
auto
place
=
context
.
GetPlace
();
PADDLE_ENFORCE
(
is_gpu_place
(
place
),
"It must use GPU place."
);
auto
&
ctx
=
reinterpret_cast
<
const
CUDADeviceContext
&>
(
context
);
thrust
::
transform
(
thrust
::
cuda
::
par
.
on
(
ctx
.
stream
()),
details
::
DevPtrCast
(
first1
),
details
::
DevPtrCast
(
last1
),
details
::
DevPtrCast
(
first2
),
details
::
DevPtrCast
(
result
),
op
);
#else
PADDLE_THROW
(
"Do not invoke `Transform<GPUPlace>` in .cc file"
);
#endif
}
};
#endif
}
// namespace platform
}
// namespace paddle
paddle/platform/transform_test.cu
浏览文件 @
37faf495
...
...
@@ -15,6 +15,7 @@
#include <gtest/gtest.h>
#include "paddle/memory/memcpy.h"
#include "paddle/memory/memory.h"
#include "paddle/platform/hostdevice.h"
#include "paddle/platform/transform.h"
template
<
typename
T
>
...
...
@@ -38,7 +39,8 @@ TEST(Transform, CPUUnary) {
using
namespace
paddle
::
platform
;
CPUDeviceContext
ctx
;
float
buf
[
4
]
=
{
0.1
,
0.2
,
0.3
,
0.4
};
Transform
(
ctx
,
buf
,
buf
+
4
,
buf
,
Scale
<
float
>
(
10
));
Transform
<
paddle
::
platform
::
CPUPlace
>
trans
;
trans
(
ctx
,
buf
,
buf
+
4
,
buf
,
Scale
<
float
>
(
10
));
for
(
int
i
=
0
;
i
<
4
;
++
i
)
{
ASSERT_NEAR
(
buf
[
i
],
static_cast
<
float
>
(
i
+
1
),
1e-5
);
}
...
...
@@ -52,7 +54,8 @@ TEST(Transform, GPUUnary) {
float
cpu_buf
[
4
]
=
{
0.1
,
0.2
,
0.3
,
0.4
};
float
*
gpu_buf
=
static_cast
<
float
*>
(
Alloc
(
gpu0
,
sizeof
(
float
)
*
4
));
Copy
(
gpu0
,
gpu_buf
,
CPUPlace
(),
cpu_buf
,
sizeof
(
cpu_buf
));
Transform
(
ctx
,
gpu_buf
,
gpu_buf
+
4
,
gpu_buf
,
Scale
<
float
>
(
10
));
Transform
<
paddle
::
platform
::
GPUPlace
>
trans
;
trans
(
ctx
,
gpu_buf
,
gpu_buf
+
4
,
gpu_buf
,
Scale
<
float
>
(
10
));
ctx
.
Wait
();
Copy
(
CPUPlace
(),
cpu_buf
,
gpu0
,
gpu_buf
,
sizeof
(
cpu_buf
));
Free
(
gpu0
,
gpu_buf
);
...
...
@@ -65,7 +68,9 @@ TEST(Transform, CPUBinary) {
using
namespace
paddle
::
platform
;
using
namespace
paddle
::
memory
;
int
buf
[
4
]
=
{
1
,
2
,
3
,
4
};
Transform
(
CPUDeviceContext
(),
buf
,
buf
+
4
,
buf
,
buf
,
Multiply
<
int
>
());
Transform
<
paddle
::
platform
::
CPUPlace
>
trans
;
CPUDeviceContext
ctx
;
trans
(
ctx
,
buf
,
buf
+
4
,
buf
,
buf
,
Multiply
<
int
>
());
for
(
int
i
=
0
;
i
<
4
;
++
i
)
{
ASSERT_EQ
((
i
+
1
)
*
(
i
+
1
),
buf
[
i
]);
}
...
...
@@ -79,11 +84,12 @@ TEST(Transform, GPUBinary) {
CUDADeviceContext
ctx
(
gpu0
);
int
*
gpu_buf
=
static_cast
<
int
*>
(
Alloc
(
gpu0
,
sizeof
(
buf
)));
Copy
(
gpu0
,
gpu_buf
,
CPUPlace
(),
buf
,
sizeof
(
buf
));
Transform
(
ctx
,
gpu_buf
,
gpu_buf
+
4
,
gpu_buf
,
gpu_buf
,
Multiply
<
int
>
());
Transform
<
paddle
::
platform
::
GPUPlace
>
trans
;
trans
(
ctx
,
gpu_buf
,
gpu_buf
+
4
,
gpu_buf
,
gpu_buf
,
Multiply
<
int
>
());
ctx
.
Wait
();
Copy
(
CPUPlace
(),
buf
,
gpu0
,
gpu_buf
,
sizeof
(
buf
));
Free
(
gpu0
,
gpu_buf
);
for
(
int
i
=
0
;
i
<
4
;
++
i
)
{
ASSERT_EQ
((
i
+
1
)
*
(
i
+
1
),
buf
[
i
]);
}
}
\ No newline at end of file
}
paddle/pserver/CMakeLists.txt
浏览文件 @
37faf495
...
...
@@ -45,14 +45,18 @@ add_dependencies(paddle_pserver paddle_proto ${external_project_dependencies})
set
(
PSERVER_MAIN_SOURCES
ParameterServer2Main.cpp
)
add_executable
(
paddle_pserver_main
${
PSERVER_MAIN_SOURCES
}
)
link_paddle_exe
(
paddle_pserver_main
)
if
(
WITH_TESTING
)
add_subdirectory
(
test
)
endif
()
install
(
TARGETS paddle_pserver_main
RUNTIME DESTINATION opt/paddle/bin
PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ
GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ
)
set_target_properties
(
paddle_pserver_main PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE
)
if
(
NOT WITH_C_API
)
add_executable
(
paddle_pserver_main
${
PSERVER_MAIN_SOURCES
}
)
link_paddle_exe
(
paddle_pserver_main
)
install
(
TARGETS paddle_pserver_main
RUNTIME DESTINATION opt/paddle/bin
PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ
GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ
)
set_target_properties
(
paddle_pserver_main PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE
)
endif
()
paddle/scripts/travis/build_ios.sh
0 → 100755
浏览文件 @
37faf495
#!/bin/bash
set
-e
# Create the build directory for CMake.
mkdir
-p
$TRAVIS_BUILD_DIR
/build_ios
cd
$TRAVIS_BUILD_DIR
/build_ios
# Compile paddle binaries
cmake
-DCMAKE_SYSTEM_NAME
=
iOS
\
-DIOS_PLATFORM
=
OS
\
-DCMAKE_OSX_ARCHITECTURES
=
"arm64"
\
-DWITH_C_API
=
ON
\
-DUSE_EIGEN_FOR_BLAS
=
ON
\
-DWITH_TESTING
=
OFF
\
-DWITH_SWIG_PY
=
OFF
\
-DWITH_STYLE_CHECK
=
OFF
\
-DCMAKE_BUILD_TYPE
=
Release
\
..
make
-j
2
paddle/scripts/travis/check_style.sh
浏览文件 @
37faf495
...
...
@@ -8,6 +8,12 @@ function abort(){
trap
'abort'
0
set
-e
# install glide
curl https://glide.sh/get | bash
eval
"
$(
GIMME_GO_VERSION
=
1.8.3 gimme
)
"
go get
-u
github.com/alecthomas/gometalinter
gometalinter
--install
cd
$TRAVIS_BUILD_DIR
export
PATH
=
/usr/bin:
$PATH
pre-commit
install
...
...
paddle/trainer/CMakeLists.txt
浏览文件 @
37faf495
...
...
@@ -50,22 +50,22 @@ macro(add_paddle_exe TARGET_NAME)
link_paddle_exe
(
${
TARGET_NAME
}
)
endmacro
()
add_paddle_exe
(
paddle_trainer
TrainerMain.cpp
)
add_paddle_exe
(
paddle_merge_model
MergeModel.cpp
)
if
(
WITH_TESTING
)
add_subdirectory
(
tests
)
add_subdirectory
(
tests
)
endif
()
install
(
TARGETS paddle_trainer paddle_merge_model
RUNTIME DESTINATION opt/paddle/bin
PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ
GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ
)
set_target_properties
(
paddle_trainer PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE
)
set_target_properties
(
paddle_merge_model PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE
)
if
(
NOT WITH_C_API
)
add_paddle_exe
(
paddle_trainer TrainerMain.cpp
)
add_paddle_exe
(
paddle_merge_model MergeModel.cpp
)
install
(
TARGETS paddle_trainer paddle_merge_model
RUNTIME DESTINATION opt/paddle/bin
PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ
GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ
)
set_target_properties
(
paddle_trainer PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE
)
set_target_properties
(
paddle_merge_model PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE
)
endif
()
if
(
APPLE
)
set
(
CMAKE_EXE_LINKER_FLAGS
"-framework CoreFoundation -framework Security"
)
...
...
@@ -73,6 +73,8 @@ endif()
if
(
WITH_GOLANG
)
add_dependencies
(
paddle_trainer_lib paddle_pserver_cclient
)
target_link_libraries
(
paddle_trainer paddle_pserver_cclient
)
target_link_libraries
(
paddle_trainer_lib paddle_pserver_cclient
)
if
(
NOT WITH_C_API
)
target_link_libraries
(
paddle_trainer paddle_pserver_cclient
)
endif
()
endif
(
WITH_GOLANG
)
paddle/utils/Excepts.h
浏览文件 @
37faf495
...
...
@@ -17,7 +17,8 @@ limitations under the License. */
#include <fenv.h>
#if defined(__APPLE__) || defined(__OSX__)
#if (defined(__APPLE__) || defined(__OSX__)) && !defined(__arm__) && \
!defined(__aarch64__)
int
fegetexcept
(
void
);
int
feenableexcept
(
unsigned
int
excepts
);
...
...
paddle/utils/arch/linux/Locks.cpp
浏览文件 @
37faf495
...
...
@@ -40,6 +40,8 @@ void Semaphore::wait() { sem_wait(&m->sem); }
void
Semaphore
::
post
()
{
sem_post
(
&
m
->
sem
);
}
/// SpinLockPrivate
#ifdef PADDLE_USE_PTHREAD_SPINLOCK
class
SpinLockPrivate
{
...
...
@@ -79,6 +81,8 @@ SpinLock::~SpinLock() { delete m; }
void
SpinLock
::
lock
()
{
m
->
lock
();
}
void
SpinLock
::
unlock
()
{
m
->
unlock
();
}
/// ThreadBarrierPrivate
#ifdef PADDLE_USE_PTHREAD_BARRIER
class
ThreadBarrierPrivate
{
...
...
@@ -136,6 +140,8 @@ public:
#endif
/// ThreadBarrier
ThreadBarrier
::
ThreadBarrier
(
int
count
)
:
m
(
new
ThreadBarrierPrivate
(
count
))
{}
ThreadBarrier
::~
ThreadBarrier
()
{
delete
m
;
}
void
ThreadBarrier
::
wait
()
{
m
->
wait
();
}
...
...
paddle/utils/arch/osx/Excepts.cpp
浏览文件 @
37faf495
...
...
@@ -14,7 +14,8 @@ limitations under the License. */
#include "paddle/utils/Excepts.h"
#if defined(__APPLE__) || defined(__OSX__)
#if (defined(__APPLE__) || defined(__OSX__)) && !defined(__arm__) && \
!defined(__aarch64__)
int
fegetexcept
(
void
)
{
static
fenv_t
fenv
;
...
...
python/paddle/v2/framework/tests/test_cross_entropy_op.py
0 → 100644
浏览文件 @
37faf495
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
class
TestCrossEntropyOp1
(
OpTest
):
"""Test standard cross-entropy, with index representation of labels.
"""
def
setUp
(
self
):
self
.
op_type
=
"cross_entropy"
batch_size
=
30
class_num
=
10
X
=
np
.
random
.
uniform
(
0.1
,
1.0
,
[
batch_size
,
class_num
]).
astype
(
"float32"
)
label
=
np
.
random
.
randint
(
0
,
class_num
,
(
batch_size
,
1
),
dtype
=
"int32"
)
cross_entropy
=
np
.
asmatrix
(
[[
-
np
.
log
(
X
[
i
][
label
[
i
][
0
]])]
for
i
in
range
(
X
.
shape
[
0
])],
dtype
=
"float32"
)
self
.
inputs
=
{
"X"
:
X
,
"Label"
:
label
}
self
.
outputs
=
{
"Y"
:
cross_entropy
}
self
.
attrs
=
{
'soft_label'
:
0
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Y"
)
class
TestCrossEntropyOp2
(
OpTest
):
"""Test soft-label cross-entropy, with vecterized soft labels.
"""
def
setUp
(
self
):
self
.
op_type
=
"cross_entropy"
batch_size
=
10
class_num
=
5
X
=
np
.
random
.
uniform
(
0.1
,
1.0
,
[
batch_size
,
class_num
]).
astype
(
"float32"
)
label
=
np
.
random
.
uniform
(
0.1
,
1.0
,
[
batch_size
,
class_num
]).
astype
(
"float32"
)
label
/=
label
.
sum
(
axis
=
1
,
keepdims
=
True
)
cross_entropy
=
(
-
label
*
np
.
log
(
X
)).
sum
(
axis
=
1
,
keepdims
=
True
).
astype
(
"float32"
)
self
.
inputs
=
{
'X'
:
X
,
'Label'
:
label
}
self
.
outputs
=
{
'Y'
:
cross_entropy
}
self
.
attrs
=
{
'soft_label'
:
1
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'Y'
)
class
TestCrossEntropyOp3
(
OpTest
):
"""Test one-hot cross-entropy, with vecterized one-hot representation of
labels.
"""
def
setUp
(
self
):
self
.
op_type
=
"cross_entropy"
batch_size
=
30
class_num
=
10
X
=
np
.
random
.
uniform
(
0.1
,
1.0
,
[
batch_size
,
class_num
]).
astype
(
"float32"
)
label_index
=
np
.
random
.
randint
(
0
,
class_num
,
(
batch_size
),
dtype
=
"int32"
)
label
=
np
.
zeros
(
X
.
shape
)
label
[
np
.
arange
(
batch_size
),
label_index
]
=
1
cross_entropy
=
np
.
asmatrix
(
[[
-
np
.
log
(
X
[
i
][
label_index
[
i
]])]
for
i
in
range
(
X
.
shape
[
0
])],
dtype
=
"float32"
)
cross_entropy2
=
(
-
label
*
np
.
log
(
X
)).
sum
(
axis
=
1
,
keepdims
=
True
).
astype
(
"float32"
)
self
.
inputs
=
{
'X'
:
X
,
'Label'
:
label
}
self
.
outputs
=
{
'Y'
:
cross_entropy
}
self
.
attrs
=
{
'soft_label'
:
1
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'Y'
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/v2/framework/tests/test_mnist.py
浏览文件 @
37faf495
...
...
@@ -128,7 +128,7 @@ def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None):
def
cross_entropy_layer
(
net
,
input
,
label
):
cost_name
=
"cross_entropy_%d"
%
uniq_id
()
cross_entropy_op
=
Operator
(
"
onehot_cross_entropy"
,
X
=
input
,
l
abel
=
label
,
Y
=
cost_name
)
"
cross_entropy"
,
X
=
input
,
L
abel
=
label
,
Y
=
cost_name
)
net
.
append_op
(
cross_entropy_op
)
scope
.
new_var
(
cost_name
)
net
.
infer_shape
(
scope
)
...
...
@@ -181,7 +181,7 @@ def error_rate(predict, label):
images
=
data_layer
(
name
=
"pixel"
,
dims
=
[
BATCH_SIZE
,
784
])
labels
=
data_layer
(
name
=
"label"
,
dims
=
[
BATCH_SIZE
])
labels
=
data_layer
(
name
=
"label"
,
dims
=
[
BATCH_SIZE
,
1
])
fc1
=
fc_layer
(
net
=
forward_net
,
input
=
images
,
size
=
100
,
act
=
"sigmoid"
)
fc2
=
fc_layer
(
net
=
forward_net
,
input
=
fc1
,
size
=
100
,
act
=
"sigmoid"
)
predict
=
fc_layer
(
net
=
forward_net
,
input
=
fc2
,
size
=
10
,
act
=
"softmax"
)
...
...
@@ -215,6 +215,7 @@ def test(cost_name):
for
data
in
test_reader
():
image_data
=
numpy
.
array
(
map
(
lambda
x
:
x
[
0
],
data
)).
astype
(
"float32"
)
label_data
=
numpy
.
array
(
map
(
lambda
x
:
x
[
1
],
data
)).
astype
(
"int32"
)
label_data
=
numpy
.
expand_dims
(
label_data
,
axis
=
1
)
feed_data
(
images
,
image_data
)
feed_data
(
labels
,
label_data
)
...
...
@@ -235,6 +236,7 @@ for pass_id in range(PASS_NUM):
for
data
in
train_reader
():
image_data
=
numpy
.
array
(
map
(
lambda
x
:
x
[
0
],
data
)).
astype
(
"float32"
)
label_data
=
numpy
.
array
(
map
(
lambda
x
:
x
[
1
],
data
)).
astype
(
"int32"
)
label_data
=
numpy
.
expand_dims
(
label_data
,
axis
=
1
)
feed_data
(
images
,
image_data
)
feed_data
(
labels
,
label_data
)
...
...
python/paddle/v2/framework/tests/test_onehot_cross_entropy_op.py
已删除
100644 → 0
浏览文件 @
e635e3fd
import
unittest
import
numpy
from
op_test
import
OpTest
class
TestOnehotCrossEntropyOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"onehot_cross_entropy"
batch_size
=
30
class_num
=
10
X
=
numpy
.
random
.
uniform
(
0.1
,
1.0
,
[
batch_size
,
class_num
]).
astype
(
"float32"
)
labels
=
numpy
.
random
.
randint
(
0
,
class_num
,
batch_size
,
dtype
=
"int32"
)
cross_entropy
=
numpy
.
asmatrix
(
[[
-
numpy
.
log
(
X
[
i
][
labels
[
i
]])]
for
i
in
range
(
X
.
shape
[
0
])],
dtype
=
"float32"
)
self
.
inputs
=
{
"X"
:
X
,
"label"
:
labels
}
self
.
outputs
=
{
"Y"
:
cross_entropy
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Y"
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/v2/framework/tests/test_prelu_op.py
浏览文件 @
37faf495
...
...
@@ -17,10 +17,10 @@ class PReluTest(OpTest):
assert
out_np
is
not
self
.
inputs
[
'X'
]
self
.
outputs
=
{
'Out'
:
out_np
}
def
not_
test_check_output
(
self
):
def
test_check_output
(
self
):
self
.
check_output
()
def
not_
test_check_grad
(
self
):
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'Out'
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录