Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
94e86897
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
94e86897
编写于
11月 17, 2017
作者:
D
dangqingqing
浏览文件
操作
浏览文件
下载
差异文件
update code and fix conflicts.
上级
082bc7af
3375e3e2
变更
64
显示空白变更内容
内联
并排
Showing
64 changed file
with
753 addition
and
494 deletion
+753
-494
CMakeLists.txt
CMakeLists.txt
+19
-7
cmake/configure.cmake
cmake/configure.cmake
+8
-21
cmake/cross_compiling/ios.cmake
cmake/cross_compiling/ios.cmake
+3
-5
cmake/cuda.cmake
cmake/cuda.cmake
+0
-1
cmake/external/mkldnn.cmake
cmake/external/mkldnn.cmake
+7
-7
cmake/external/openblas.cmake
cmake/external/openblas.cmake
+7
-8
cmake/external/warpctc.cmake
cmake/external/warpctc.cmake
+4
-0
cmake/util.cmake
cmake/util.cmake
+2
-2
doc/design/mkldnn/README.MD
doc/design/mkldnn/README.MD
+4
-4
doc/howto/dev/write_docs_cn.rst
doc/howto/dev/write_docs_cn.rst
+1
-1
doc/mobile/cross_compiling_for_android_cn.md
doc/mobile/cross_compiling_for_android_cn.md
+1
-1
doc/mobile/cross_compiling_for_ios_cn.md
doc/mobile/cross_compiling_for_ios_cn.md
+6
-6
doc/mobile/cross_compiling_for_raspberry_cn.md
doc/mobile/cross_compiling_for_raspberry_cn.md
+1
-1
paddle/cuda/include/hl_gpu.h
paddle/cuda/include/hl_gpu.h
+2
-0
paddle/framework/backward.cc
paddle/framework/backward.cc
+49
-19
paddle/framework/data_type.h
paddle/framework/data_type.h
+2
-0
paddle/framework/ddim.cc
paddle/framework/ddim.cc
+1
-2
paddle/framework/executor.cc
paddle/framework/executor.cc
+1
-0
paddle/framework/op_desc.cc
paddle/framework/op_desc.cc
+23
-1
paddle/framework/op_desc.h
paddle/framework/op_desc.h
+4
-0
paddle/framework/operator.cc
paddle/framework/operator.cc
+0
-13
paddle/framework/scope.cc
paddle/framework/scope.cc
+2
-1
paddle/framework/shape_inference.h
paddle/framework/shape_inference.h
+4
-3
paddle/gserver/layers/MKLDNNLayer.cpp
paddle/gserver/layers/MKLDNNLayer.cpp
+1
-1
paddle/math/Storage.cpp
paddle/math/Storage.cpp
+4
-0
paddle/operators/array_operator.h
paddle/operators/array_operator.h
+1
-0
paddle/operators/bilinear_tensor_product_op.h
paddle/operators/bilinear_tensor_product_op.h
+1
-1
paddle/operators/conv_transpose_op.cc
paddle/operators/conv_transpose_op.cc
+1
-6
paddle/operators/conv_transpose_op.h
paddle/operators/conv_transpose_op.h
+2
-4
paddle/operators/cos_sim_op.h
paddle/operators/cos_sim_op.h
+1
-1
paddle/operators/detail/safe_ref.h
paddle/operators/detail/safe_ref.h
+31
-0
paddle/operators/fill_constant_batch_size_like_op.cc
paddle/operators/fill_constant_batch_size_like_op.cc
+4
-1
paddle/operators/fill_constant_batch_size_like_op.cu.cc
paddle/operators/fill_constant_batch_size_like_op.cu.cc
+4
-1
paddle/operators/fill_zeros_like_op.cc
paddle/operators/fill_zeros_like_op.cc
+5
-2
paddle/operators/fill_zeros_like_op.cu.cc
paddle/operators/fill_zeros_like_op.cu.cc
+5
-2
paddle/operators/is_empty_op.cc
paddle/operators/is_empty_op.cc
+67
-0
paddle/operators/math/CMakeLists.txt
paddle/operators/math/CMakeLists.txt
+2
-2
paddle/operators/math/im2col.cu
paddle/operators/math/im2col.cu
+2
-2
paddle/operators/math/math_function.cc
paddle/operators/math/math_function.cc
+2
-0
paddle/operators/math/math_function.cu
paddle/operators/math/math_function.cu
+2
-0
paddle/operators/sum_op.cc
paddle/operators/sum_op.cc
+29
-8
paddle/operators/tensor_array_read_write_op.cc
paddle/operators/tensor_array_read_write_op.cc
+15
-9
paddle/operators/while_op.cc
paddle/operators/while_op.cc
+153
-31
paddle/scripts/docker/README.md
paddle/scripts/docker/README.md
+1
-2
paddle/scripts/docker/build.sh
paddle/scripts/docker/build.sh
+2
-6
paddle/scripts/submit_local.sh.in
paddle/scripts/submit_local.sh.in
+5
-5
paddle/scripts/travis/build_doc.sh
paddle/scripts/travis/build_doc.sh
+1
-1
paddle/trainer/Trainer.cpp
paddle/trainer/Trainer.cpp
+4
-0
python/paddle/v2/fluid/framework.py
python/paddle/v2/fluid/framework.py
+27
-8
python/paddle/v2/fluid/net_drawer.py
python/paddle/v2/fluid/net_drawer.py
+4
-0
python/paddle/v2/fluid/tests/book/test_fit_a_line.py
python/paddle/v2/fluid/tests/book/test_fit_a_line.py
+9
-20
python/paddle/v2/fluid/tests/book/test_image_classification_train.py
...le/v2/fluid/tests/book/test_image_classification_train.py
+20
-80
python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py
.../paddle/v2/fluid/tests/book/test_recognize_digits_conv.py
+9
-20
python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
...n/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
+10
-25
python/paddle/v2/fluid/tests/book/test_recommender_system.py
python/paddle/v2/fluid/tests/book/test_recommender_system.py
+27
-72
python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py
...dle/v2/fluid/tests/book/test_understand_sentiment_conv.py
+5
-6
python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py
...luid/tests/book/test_understand_sentiment_dynamic_lstm.py
+4
-6
python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py
...dle/v2/fluid/tests/book/test_understand_sentiment_lstm.py
+4
-5
python/paddle/v2/fluid/tests/book/test_word2vec.py
python/paddle/v2/fluid/tests/book/test_word2vec.py
+13
-36
python/paddle/v2/fluid/tests/test_conv2d_op.py
python/paddle/v2/fluid/tests/test_conv2d_op.py
+28
-12
python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py
python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py
+21
-5
python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py
python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py
+23
-8
python/paddle/v2/fluid/tests/test_is_empty_op.py
python/paddle/v2/fluid/tests/test_is_empty_op.py
+43
-0
python/paddle/v2/fluid/tests/test_while_op.py
python/paddle/v2/fluid/tests/test_while_op.py
+10
-3
未找到文件。
CMakeLists.txt
浏览文件 @
94e86897
...
...
@@ -36,8 +36,7 @@ include(simd)
################################ Configurations #######################################
option
(
WITH_GPU
"Compile PaddlePaddle with NVIDIA GPU"
${
CUDA_FOUND
}
)
option
(
WITH_AVX
"Compile PaddlePaddle with AVX intrinsics"
${
AVX_FOUND
}
)
option
(
WITH_MKLDNN
"Compile PaddlePaddle with mkl-dnn support."
${
AVX_FOUND
}
)
option
(
WITH_MKLML
"Compile PaddlePaddle with mklml package."
${
AVX_FOUND
}
)
option
(
WITH_MKL
"Compile PaddlePaddle with MKL support."
${
AVX_FOUND
}
)
option
(
WITH_DSO
"Compile PaddlePaddle with dynamic linked CUDA"
ON
)
option
(
WITH_TESTING
"Compile PaddlePaddle with unit testing"
ON
)
option
(
WITH_SWIG_PY
"Compile PaddlePaddle with inference api"
ON
)
...
...
@@ -82,10 +81,8 @@ if(ANDROID OR IOS)
"Disable PYTHON when cross-compiling for Android and iOS"
FORCE
)
set
(
WITH_RDMA OFF CACHE STRING
"Disable RDMA when cross-compiling for Android and iOS"
FORCE
)
set
(
WITH_MKLDNN OFF CACHE STRING
"Disable MKLDNN when cross-compiling for Android and iOS"
FORCE
)
set
(
WITH_MKLML OFF CACHE STRING
"Disable MKLML package when cross-compiling for Android and iOS"
FORCE
)
set
(
WITH_MKL OFF CACHE STRING
"Disable MKL when cross-compiling for Android and iOS"
FORCE
)
# Compile PaddlePaddle mobile inference library
if
(
NOT WITH_C_API
)
...
...
@@ -111,6 +108,17 @@ else()
set
(
THIRD_PARTY_BUILD_TYPE Release
)
endif
()
if
(
WITH_MKL
)
set
(
WITH_MKLML ON
)
set
(
WITH_MKLDNN
${
AVX2_FOUND
}
)
if
(
NOT WITH_MKLDNN
)
message
(
WARNING
"Do not have AVX2 intrinsics and disabled MKL-DNN"
)
endif
()
else
()
set
(
WITH_MKLML OFF
)
set
(
WITH_MKLDNN OFF
)
endif
()
########################################################################################
include
(
external/mklml
)
# download mklml package
...
...
@@ -161,8 +169,12 @@ if(WITH_GPU)
include
(
cuda
)
endif
(
WITH_GPU
)
if
(
WITH_MKLML
)
list
(
APPEND EXTERNAL_LIBS
${
MKLML_IOMP_LIB
}
)
endif
()
if
(
WITH_MKLDNN
)
list
(
APPEND EXTERNAL_LIBS
${
MKLDNN_LIB
}
${
MKLDNN_IOMP_LIB
}
)
list
(
APPEND EXTERNAL_LIBS
${
MKLDNN_LIB
}
)
endif
()
if
(
USE_NNPACK
)
...
...
cmake/configure.cmake
浏览文件 @
94e86897
...
...
@@ -76,27 +76,14 @@ else()
include_directories
(
${
CUDA_TOOLKIT_INCLUDE
}
)
endif
(
NOT WITH_GPU
)
if
(
WITH_MKLDNN
)
add_definitions
(
-DPADDLE_USE_MKLDNN
)
if
(
WITH_MKLML AND MKLDNN_IOMP_DIR
)
message
(
STATUS
"Enable Intel OpenMP at
${
MKLDNN_IOMP_DIR
}
"
)
if
(
WITH_MKLML AND MKLML_IOMP_LIB
)
message
(
STATUS
"Enable Intel OpenMP with
${
MKLML_IOMP_LIB
}
"
)
set
(
OPENMP_FLAGS
"-fopenmp"
)
set
(
CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS
${
OPENMP_FLAGS
}
)
set
(
CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS
${
OPENMP_FLAGS
}
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
${
OPENMP_FLAGS
}
"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
${
OPENMP_FLAGS
}
"
)
else
()
find_package
(
OpenMP
)
if
(
OPENMP_FOUND
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
${
OpenMP_C_FLAGS
}
"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
${
OpenMP_CXX_FLAGS
}
"
)
else
()
message
(
WARNING
"Can not find OpenMP."
"Some performance features in MKLDNN may not be available"
)
endif
()
endif
()
endif
(
WITH_MKLDNN
)
endif
()
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
${
SIMD_FLAG
}
"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
${
SIMD_FLAG
}
"
)
...
...
cmake/cross_compiling/ios.cmake
浏览文件 @
94e86897
...
...
@@ -76,11 +76,9 @@ set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform")
# Set the architecture for iOS
if
(
NOT DEFINED IOS_ARCH
)
if
(
IOS_PLATFORM STREQUAL
"OS"
)
# FIXME(liuyiqun): support "armv7;armv7s;arm64" future
set
(
IOS_ARCH
"arm64"
)
set
(
IOS_ARCH
"armv7;armv7s;arm64"
)
elseif
(
IOS_PLATFORM STREQUAL
"SIMULATOR"
)
# FIXME(liuyiqun): support "i386;x86_64" future
set
(
IOS_ARCH
"x86_64"
)
set
(
IOS_ARCH
"i386;x86_64"
)
endif
()
endif
()
set
(
CMAKE_OSX_ARCHITECTURES
${
IOS_ARCH
}
CACHE string
"Build architecture for iOS"
)
...
...
@@ -248,7 +246,7 @@ set(IOS_COMPILER_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} ${XCODE_IOS_BITCODE_
# Hidden visibilty is required for cxx on iOS
set
(
CMAKE_C_FLAGS
"
${
IOS_COMPILER_FLAGS
}
${
CMAKE_C_FLAGS
}
"
CACHE STRING
"C flags"
)
set
(
CMAKE_CXX_FLAGS
"
${
IOS_COMPILER_FLAGS
}
-fvisibility-inlines-hidden
${
CMAKE_CXX_FLAGS
}
"
CACHE STRING
"CXX flags"
)
set
(
CMAKE_CXX_FLAGS
"
${
IOS_COMPILER_FLAGS
}
-fvisibility
=hidden -fvisibility
-inlines-hidden
${
CMAKE_CXX_FLAGS
}
"
CACHE STRING
"CXX flags"
)
set
(
IOS_LINK_FLAGS
"
${
XCODE_IOS_PLATFORM_VERSION_FLAGS
}
-Wl,-search_paths_first"
)
...
...
cmake/cuda.cmake
浏览文件 @
94e86897
...
...
@@ -63,7 +63,6 @@ function(select_nvcc_arch_flags out_variable)
set
(
archs_name_default
"All"
)
if
(
NOT CMAKE_CROSSCOMPILING
)
list
(
APPEND archs_names
"Auto"
)
set
(
archs_name_default
"Auto"
)
endif
()
# set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui)
...
...
cmake/external/mkldnn.cmake
浏览文件 @
94e86897
...
...
@@ -40,10 +40,9 @@ INCLUDE_DIRECTORIES(${MKLDNN_INC_DIR})
IF
(
${
CBLAS_PROVIDER
}
STREQUAL
"MKLML"
)
SET
(
MKLDNN_DEPENDS
${
MKLML_PROJECT
}
)
SET
(
MKLDNN_MKLROOT
${
MKLML_ROOT
}
)
SET
(
MKLDNN_IOMP_LIB
${
MKLML_IOMP_LIB
}
)
SET
(
MKLDNN_IOMP_DIR
${
MKLML_LIB_DIR
}
)
MESSAGE
(
STATUS
"Build MKLDNN with
${
MKLDNN_MKLROOT
}
"
)
MESSAGE
(
STATUS
"Build MKLDNN with MKLML
${
MKLML_ROOT
}
"
)
ELSE
()
MESSAGE
(
FATAL_ERROR
"Should enable MKLML when build MKLDNN"
)
ENDIF
()
SET
(
MKLDNN_CFLAG
"
${
CMAKE_C_FLAGS
}
-Wno-error=strict-overflow"
)
...
...
@@ -57,15 +56,16 @@ ExternalProject_Add(
PREFIX
${
MKLDNN_SOURCES_DIR
}
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
${
MKLDNN_INSTALL_DIR
}
CMAKE_ARGS -DMKLROOT=
${
MKL
DNN_MKL
ROOT
}
CMAKE_ARGS -DMKLROOT=
${
MKL
ML_
ROOT
}
CMAKE_ARGS -DCMAKE_C_FLAGS=
${
MKLDNN_CFLAG
}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=
${
MKLDNN_CXXFLAG
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
MKLDNN_INSTALL_DIR
}
-DMKLROOT:PATH=
${
MKL
DNN_MKL
ROOT
}
-DMKLROOT:PATH=
${
MKL
ML_
ROOT
}
)
ADD_LIBRARY
(
mkldnn SHARED IMPORTED GLOBAL
)
SET_PROPERTY
(
TARGET mkldnn PROPERTY IMPORTED_LOCATION
${
MKLDNN_LIB
}
)
ADD_DEPENDENCIES
(
mkldnn
${
MKLDNN_PROJECT
}
)
MESSAGE
(
STATUS
"Mkldnn library:
${
MKLDNN_LIB
}
"
)
MESSAGE
(
STATUS
"MKLDNN library:
${
MKLDNN_LIB
}
"
)
add_definitions
(
-DPADDLE_USE_MKLDNN
)
LIST
(
APPEND external_project_dependencies mkldnn
)
cmake/external/openblas.cmake
浏览文件 @
94e86897
...
...
@@ -29,7 +29,7 @@ IF(NOT ${CBLAS_FOUND})
"
${
CBLAS_INSTALL_DIR
}
/lib/
${
CMAKE_STATIC_LIBRARY_PREFIX
}
openblas
${
CMAKE_STATIC_LIBRARY_SUFFIX
}
"
CACHE FILEPATH
"openblas library."
FORCE
)
SET
(
OPENBLAS_CC
"
${
CMAKE_C_COMPILER
}
"
)
SET
(
OPENBLAS_CC
"
${
CMAKE_C_COMPILER
}
-Wno-unused-but-set-variable -Wno-unused-variable
"
)
IF
(
CMAKE_CROSSCOMPILING
)
SET
(
OPTIONAL_ARGS HOSTCC=
${
HOST_C_COMPILER
}
)
...
...
@@ -45,15 +45,14 @@ IF(NOT ${CBLAS_FOUND})
SET
(
OPTIONAL_ARGS
${
OPTIONAL_ARGS
}
TARGET=ARMV8 BINARY=64 USE_THREAD=0
)
ENDIF
()
ELSEIF
(
IOS
)
# FIXME(liuyiqun): support multiple architectures
IF
(
CMAKE_OSX_ARCHITECTURES MATCHES
"arm64"
)
SET
(
OPENBLAS_COMMIT
"b5c96fcfcdc82945502a2303116a64d89985daf5"
)
SET
(
OPENBLAS_CC
"
${
OPENBLAS_CC
}
${
CMAKE_C_FLAGS
}
-isysroot
${
CMAKE_OSX_SYSROOT
}
"
)
IF
(
CMAKE_OSX_ARCHITECTURES MATCHES
"armv7"
)
SET
(
OPENBLAS_CC
"
${
OPENBLAS_CC
}
-arch armv7"
)
SET
(
OPTIONAL_ARGS
${
OPTIONAL_ARGS
}
TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0
)
ELSEIF
(
CMAKE_OSX_ARCHITECTURES MATCHES
"arm64"
)
SET
(
OPENBLAS_CC
"
${
OPENBLAS_CC
}
-arch arm64"
)
SET
(
OPTIONAL_ARGS
${
OPTIONAL_ARGS
}
TARGET=ARMV8 BINARY=64 USE_THREAD=0 CROSS_SUFFIX=
${
CROSS_SUFFIX
}
)
ELSE
()
MESSAGE
(
FATAL_ERROR
"OpenBLAS only support arm64 architectures on iOS. "
"You can set IOS_USE_VECLIB_FOR_BLAS=ON or USE_EIGEN_FOR_BLAS=ON to use other blas library instead."
)
ENDIF
()
ELSEIF
(
RPI
)
# use hardfp
...
...
cmake/external/warpctc.cmake
浏览文件 @
94e86897
...
...
@@ -12,6 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
IF
(
MOBILE_INFERENCE
)
return
()
ENDIF
()
INCLUDE
(
ExternalProject
)
SET
(
WARPCTC_SOURCES_DIR
${
THIRD_PARTY_PATH
}
/warpctc
)
...
...
cmake/util.cmake
浏览文件 @
94e86897
...
...
@@ -115,8 +115,8 @@ function(link_paddle_exe TARGET_NAME)
target_link_libraries
(
${
TARGET_NAME
}
log
)
endif
(
ANDROID
)
if
(
WITH_MKL
DNN AND WITH_MKLML AND MKLDNN_IOMP_DIR
)
target_link_libraries
(
${
TARGET_NAME
}
"-L
${
MKL
DNN_IOMP
_DIR
}
-liomp5 -Wl,--as-needed"
)
if
(
WITH_MKL
ML AND MKLML_LIB_DIR AND MKLML_IOMP_LIB
)
target_link_libraries
(
${
TARGET_NAME
}
"-L
${
MKL
ML_LIB
_DIR
}
-liomp5 -Wl,--as-needed"
)
endif
()
add_dependencies
(
${
TARGET_NAME
}
${
external_project_dependencies
}
)
...
...
doc/design/mkldnn/README.MD
浏览文件 @
94e86897
...
...
@@ -36,13 +36,13 @@ Figure 1. PaddlePaddle on IA.
我们把集成方案大致分为了如下几个方面。
### CMake
我们会在
`CMakeLists.txt`
中会
添加
`WITH_MKLDNN`
的选项,当设置这个值为
`ON`
的时候会启用编译MKL-DNN功能。同时会自动开启OpenMP用于提高MKL-DNN的性能
。
我们会在
`CMakeLists.txt`
中会
给用户添加一个
`WITH_MKL`
的开关,他是负责
`WITH_MKLML`
和
`WITH_MKLDNN`
的总开关
。
同时,我们会引入
`WITH_MKLML`
选项,用于选择是否使用MKL-DNN自带的MKLML安装包。这个安装包可以独立于MKL-DNN使用,但是建议在开启MKL-DNN的同时也打开MKLML的开关,这样才能发挥最好的性
能。
当打开
`WITH_MKL`
时,会开启MKLML的功能,作为PaddlePaddle的CBLAS和LAPACK库,同时会开启Intel OpenMP用于提高MKLML的性能。 如果系统支持AVX2指令集及以上,同时会开启MKL-DNN功
能。
所以,我们会在
`cmake/external`
目录新建
`mkldnn.cmake`
和
`mklml.cmake`
文件,它们会在编译PaddlePaddle的时候下载对应的软件包,并放到PaddlePaddle的third party目录中
。
当关闭
`WITH_MKL`
时,MKLML和MKL-DNN功能会同时关闭
。
**备注**
:当
`WITH_MKLML=ON`
的时候,会优先使用这个包作为PaddlePaddle的CBLAS和LAPACK库,所以会稍微改动
`cmake/cblas.cmake`
中的逻辑
。
所以,我们会在
`cmake/external`
目录新建
`mkldnn.cmake`
和
`mklml.cmake`
文件,它们会在编译PaddlePaddle的时候下载对应的软件包,并放到PaddlePaddle的third party目录中
。
### Layers
所有MKL-DNN相关的C++ layers,都会按照PaddlePaddle的目录结构存放在
...
...
doc/howto/dev/write_docs_cn.rst
浏览文件 @
94e86897
...
...
@@ -34,7 +34,7 @@ PaddlePaddle的文档构建有两种方式。
cd TO_YOUR_PADDLE_CLONE_PATH
mkdir -p build
cd build
cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKL
DNN=OFF -DWITH_MKLML
=OFF -DWITH_DOC=ON
cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKL=OFF -DWITH_DOC=ON
make gen_proto_py
make paddle_docs paddle_docs_cn
...
...
doc/mobile/cross_compiling_for_android_cn.md
浏览文件 @
94e86897
#
构建Android平台上的PaddlePaddle库
#
Android平台编译指南
用户可通过如下两种方式,交叉编译Android平台上适用的PaddlePaddle库:
-
基于Docker容器的编译方式
...
...
doc/mobile/cross_compiling_for_ios_cn.md
浏览文件 @
94e86897
#
构建iOS平台上的PaddlePaddle库
#
iOS平台编译指南
交叉编译iOS平台上适用的PaddlePaddle库,需要在MacOS系统上进行。本文的将介绍在MacOS上,从源码交叉编译iOS平台上适用的PaddlePaddle库。
## 准备交叉编译环境
...
...
@@ -25,7 +25,7 @@ iOS平台可选配置参数:
-
`IOS_PLATFORM`
,可设置为
`OS/SIMULATOR`
,默认值为
`OS`
。
-
`OS`
,构建目标为
`arm`
架构的iPhone或者iPad等物理设备。
-
`SIMULATOR`
,构建目标为
`x86`
架构的模拟器平台。
-
`IOS_ARCH`
,目标架构。针对不同的
`IOS_PLATFORM`
,可设置的目标架构如下表所示:
-
`IOS_ARCH`
,目标架构。针对不同的
`IOS_PLATFORM`
,可设置的目标架构如下表所示
,默认编译所有架构
:
<table class="docutils">
<colgroup>
...
...
@@ -41,11 +41,11 @@ iOS平台可选配置参数:
<tbody valign="top">
<tr class="row-even">
<td>OS</td>
<td>armv7, armv7s, arm64
(默认)
</td>
<td>armv7, armv7s, arm64 </td>
</tr>
<tr class="row-odd">
<td>SIMULATOR</td>
<td>i386, x86_64
(默认)
</td>
<td>i386, x86_64 </td>
</tr>
</tbody>
</table>
...
...
@@ -66,7 +66,7 @@ iOS平台可选配置参数:
```
bash
cmake
-DCMAKE_SYSTEM_NAME
=
iOS
\
-DIOS_PLATFORM
=
OS
\
-DIOS_ARCH
=
"arm64"
\
-DIOS_ARCH
=
"arm
v7;arm
64"
\
-DIOS_ENABLE_BITCODE
=
ON
\
-DIOS_USE_VECLIB_FOR_BLAS
=
ON
\
-DCMAKE_INSTALL_PREFIX
=
your/path/to/install
\
...
...
@@ -112,6 +112,6 @@ $ make install
-
`lib`
目录,其中包含PaddlePaddle的C-API静态库
-
`third_party`
目录,其中包含所依赖的所有第三方库
注意,
不同架构的PaddlePaddle库建议安装到不同的目录下,然后使用
`lipo`
工具将多个静态库合并成一个支持多个架构的
fat库。
注意,
如果PaddlePaddle库需要同时支持真机和模拟器,则需要分别编译真机和模拟器版本,然后使用
`lipo`
工具合并
fat库。
自此,PaddlePaddle库已经安装完成,用户可将合成的fat库用于深度学习相关的iOS App中,调用方法见C-API文档。
doc/mobile/cross_compiling_for_raspberry_cn.md
浏览文件 @
94e86897
#
构建Raspberry Pi平台上的PaddlePaddle库
#
Raspberry Pi平台编译指南
通常有两个方法来构建基于 Rasspberry Pi 的版本:
...
...
paddle/cuda/include/hl_gpu.h
浏览文件 @
94e86897
...
...
@@ -25,7 +25,9 @@ limitations under the License. */
#include "hl_matrix.h"
#include "hl_sequence.h"
#include "hl_sparse.h"
#ifndef PADDLE_MOBILE_INFERENCE
#include "hl_warpctc_wrap.h"
#endif
#ifdef HPPL_STUB_FUNC
#include "stub/hl_aggregate_stub.h"
...
...
paddle/framework/backward.cc
浏览文件 @
94e86897
...
...
@@ -270,6 +270,19 @@ static bool AllGradInSet(const std::vector<std::string>& names,
return
false
;
}
}
if
(
VLOG_IS_ON
(
10
))
{
std
::
ostringstream
sout
;
sout
<<
"All input {"
;
for
(
auto
&
name
:
names
)
{
sout
<<
name
<<
","
;
}
sout
<<
"} is in {"
;
for
(
auto
&
name
:
set
)
{
sout
<<
name
<<
","
;
}
sout
<<
"}"
;
VLOG
(
10
)
<<
sout
.
str
();
}
return
true
;
}
...
...
@@ -290,14 +303,12 @@ static void CreateGradVarInBlock(
auto
ops
=
block_desc
->
AllOps
();
for
(
size_t
op_index
=
grad_op_start_index
;
op_index
<
ops
.
size
();
++
op_index
)
{
bool
need_infer_shape
=
false
;
std
::
unordered_set
<
std
::
string
>
new_vars
;
ForEachVarName
(
ops
[
op_index
]
->
Outputs
(),
[
&
](
const
std
::
string
&
grad_var_name
)
{
if
(
block_desc
->
HasVar
(
grad_var_name
))
{
return
false
;
}
need_infer_shape
=
true
;
auto
var
=
block_desc
->
Var
(
grad_var_name
);
new_vars
.
insert
(
var
->
Name
());
auto
it
=
param_name_map
.
find
(
grad_var_name
);
...
...
@@ -311,7 +322,6 @@ static void CreateGradVarInBlock(
grad_record
.
op_idx_
=
static_cast
<
int
>
(
op_index
);
return
false
;
/* not break */
});
if
(
need_infer_shape
)
{
ops
[
op_index
]
->
InferVarType
(
block_desc
);
for
(
auto
&
arg
:
ops
[
op_index
]
->
OutputArgumentNames
())
{
if
(
new_vars
.
find
(
arg
)
==
new_vars
.
end
())
{
...
...
@@ -328,7 +338,6 @@ static void CreateGradVarInBlock(
}
ops
[
op_index
]
->
InferShape
(
*
block_desc
);
}
}
}
std
::
vector
<
std
::
unique_ptr
<
OpDescBind
>>
MakeOpGrad
(
...
...
@@ -387,6 +396,7 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
ProgramDescBind
&
program_desc
,
int
block_idx
,
std
::
unordered_set
<
std
::
string
>*
no_grad_vars
,
std
::
unordered_map
<
std
::
string
,
std
::
string
>*
grad_to_var
)
{
VLOG
(
5
)
<<
"MakeBlockBackward"
;
BlockDescBind
*
cur_block
=
program_desc
.
MutableBlock
(
block_idx
);
std
::
vector
<
OpDescBind
*>
op_descs
=
cur_block
->
AllOps
();
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
size_t
>>
dup_out_ops
;
...
...
@@ -394,9 +404,10 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
std
::
vector
<
std
::
unique_ptr
<
OpDescBind
>>
backward_descs
;
for
(
auto
it
=
op_descs
.
rbegin
();
it
!=
op_descs
.
rend
();
++
it
)
{
VLOG
(
5
)
<<
"Making backward "
<<
(
*
it
)
->
Type
()
<<
" op"
;
std
::
vector
<
std
::
unique_ptr
<
OpDescBind
>>
op_grads
;
if
((
*
it
)
->
Type
()
==
"recurrent"
)
{
if
((
*
it
)
->
Type
()
==
"recurrent"
||
(
*
it
)
->
Type
()
==
"while"
)
{
int
step_block_idx
=
(
*
it
)
->
GetBlockAttr
(
"step_block"
);
BlockDescBind
*
backward_block
=
CreateStepBlock
(
program_desc
,
no_grad_vars
,
grad_to_var
,
step_block_idx
);
...
...
@@ -410,6 +421,15 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
op_grads
=
MakeOpGrad
(
*
it
,
no_grad_vars
,
grad_to_var
);
}
if
(
VLOG_IS_ON
(
10
))
{
std
::
ostringstream
sout
;
sout
<<
"Made "
;
for
(
auto
&
op_grad
:
op_grads
)
{
sout
<<
op_grad
->
Type
()
<<
" "
;
}
VLOG
(
10
)
<<
sout
.
str
();
}
for
(
const
auto
&
desc
:
op_grads
)
{
for
(
const
std
::
string
&
out_name
:
desc
->
OutputArgumentNames
())
{
if
(
out_name
.
find
(
"@GRAD"
)
==
std
::
string
::
npos
)
{
...
...
@@ -425,6 +445,8 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
op_grads
.
begin
(),
op_grads
.
end
(),
std
::
back_inserter
(
backward_descs
),
[](
std
::
unique_ptr
<
OpDescBind
>&
ptr
)
{
return
std
::
move
(
ptr
);
});
}
VLOG
(
5
)
<<
"Appending Sums"
;
// Check whether some variables are written more than once
std
::
list
<
std
::
pair
<
size_t
,
std
::
unique_ptr
<
OpDescBind
>>>
pending_sum_ops
;
for
(
const
auto
&
dup
:
dup_out_ops
)
{
...
...
@@ -432,16 +454,22 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
const
std
::
vector
<
size_t
>
dup_op
=
dup
.
second
;
if
(
out_name
!=
kEmptyVarName
&&
dup_op
.
size
()
>
1
)
{
std
::
vector
<
std
::
string
>
sum_op_inputs
;
std
::
string
next_g_name
=
out_name
;
for
(
size_t
i
=
0
;
i
<
dup_op
.
size
();
++
i
)
{
VLOG
(
10
)
<<
backward_descs
[
dup_op
[
i
]]
->
Type
()
<<
" has "
<<
out_name
<<
" duplicated"
;
std
::
string
new_name
=
out_name
+
"@RENAME@"
+
std
::
to_string
(
i
);
backward_descs
[
dup_op
[
i
]]
->
Rename
(
out_name
,
new_name
);
backward_descs
[
dup_op
[
i
]]
->
RenameOutput
(
out_name
,
new_name
);
backward_descs
[
dup_op
[
i
]]
->
RenameInput
(
out_name
,
next_g_name
);
sum_op_inputs
.
emplace_back
(
new_name
);
next_g_name
=
sum_op_inputs
.
back
();
}
std
::
unique_ptr
<
OpDescBind
>
sum_op
(
new
OpDescBind
(
"sum"
,
{{
"X"
,
sum_op_inputs
}},
{{
"Out"
,
{
out_name
}}},
{}));
pending_sum_ops
.
push_back
({
dup_op
.
back
(),
std
::
move
(
sum_op
)});
}
}
pending_sum_ops
.
sort
(
[](
const
std
::
pair
<
size_t
,
std
::
unique_ptr
<
OpDescBind
>>&
a
,
const
std
::
pair
<
size_t
,
std
::
unique_ptr
<
OpDescBind
>>&
b
)
{
...
...
@@ -452,6 +480,8 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
std
::
move
(
p
.
second
));
}
VLOG
(
5
)
<<
"MakeBlockBackward Finished"
;
return
backward_descs
;
}
...
...
paddle/framework/data_type.h
浏览文件 @
94e86897
...
...
@@ -29,6 +29,8 @@ inline DataType ToDataType(std::type_index type) {
return
DataType
::
INT32
;
}
else
if
(
typeid
(
int64_t
).
hash_code
()
==
type
.
hash_code
())
{
return
DataType
::
INT64
;
}
else
if
(
typeid
(
bool
).
hash_code
()
==
type
.
hash_code
())
{
return
DataType
::
BOOL
;
}
else
{
PADDLE_THROW
(
"Not supported"
);
}
...
...
paddle/framework/ddim.cc
浏览文件 @
94e86897
...
...
@@ -60,8 +60,7 @@ void make_ddim(DDim& ddim, const int64_t* dims, int n) {
ddim
=
make_dim
<
9
>
(
dims
);
break
;
default:
throw
std
::
invalid_argument
(
"Dynamic dimensions must have between [1, 9] dimensions."
);
PADDLE_THROW
(
"Dynamic dimensions must have between [1, 9] dimensions."
);
}
}
...
...
paddle/framework/executor.cc
浏览文件 @
94e86897
...
...
@@ -120,6 +120,7 @@ void Executor::Run(const ProgramDescBind& pdesc, Scope* scope, int block_id,
for
(
auto
&
op_desc
:
block
.
AllOps
())
{
auto
op
=
paddle
::
framework
::
OpRegistry
::
CreateOp
(
*
op_desc
);
VLOG
(
10
)
<<
op
->
DebugString
();
op
->
Run
(
*
local_scope
,
*
device
);
}
if
(
create_local_scope
)
{
...
...
paddle/framework/op_desc.cc
浏览文件 @
94e86897
...
...
@@ -235,6 +235,23 @@ void OpDescBind::Rename(const std::string &old_name,
need_update_
=
true
;
}
void
OpDescBind
::
RenameOutput
(
const
std
::
string
&
old_name
,
const
std
::
string
&
new_name
)
{
for
(
auto
&
output
:
outputs_
)
{
std
::
replace
(
output
.
second
.
begin
(),
output
.
second
.
end
(),
old_name
,
new_name
);
}
need_update_
=
true
;
}
void
OpDescBind
::
RenameInput
(
const
std
::
string
&
old_name
,
const
std
::
string
&
new_name
)
{
for
(
auto
&
input
:
inputs_
)
{
std
::
replace
(
input
.
second
.
begin
(),
input
.
second
.
end
(),
old_name
,
new_name
);
}
need_update_
=
true
;
}
struct
SetAttrDescVisitor
:
public
boost
::
static_visitor
<
void
>
{
explicit
SetAttrDescVisitor
(
OpDesc
::
Attr
*
attr
)
:
attr_
(
attr
)
{}
mutable
OpDesc
::
Attr
*
attr_
;
...
...
@@ -448,7 +465,12 @@ const std::vector<std::string> &CompileTimeInferShapeContext::Outputs(
DDim
CompileTimeInferShapeContext
::
GetDim
(
const
std
::
string
&
name
)
const
{
auto
var
=
block_
.
FindVarRecursive
(
name
);
PADDLE_ENFORCE
(
var
!=
nullptr
,
"Cannot find variable %s"
,
name
);
try
{
return
framework
::
make_ddim
(
var
->
Shape
());
}
catch
(...)
{
VLOG
(
5
)
<<
"GetDim of variable "
<<
name
<<
" error"
;
std
::
rethrow_exception
(
std
::
current_exception
());
}
}
void
CompileTimeInferShapeContext
::
SetDim
(
const
std
::
string
&
name
,
...
...
paddle/framework/op_desc.h
浏览文件 @
94e86897
...
...
@@ -73,6 +73,10 @@ class OpDescBind {
void
Rename
(
const
std
::
string
&
old_name
,
const
std
::
string
&
new_name
);
void
RenameOutput
(
const
std
::
string
&
old_name
,
const
std
::
string
&
new_name
);
void
RenameInput
(
const
std
::
string
&
old_name
,
const
std
::
string
&
new_name
);
// Only be used in C++
const
AttributeMap
&
GetAttrMap
()
const
;
...
...
paddle/framework/operator.cc
浏览文件 @
94e86897
...
...
@@ -403,19 +403,6 @@ class RuntimeInferShapeContext : public InferShapeContext {
void
OperatorWithKernel
::
Run
(
const
Scope
&
scope
,
const
platform
::
DeviceContext
&
dev_ctx
)
const
{
if
(
VLOG_IS_ON
(
1
))
{
auto
inputs
=
this
->
InputVars
();
auto
outputs
=
this
->
OutputVars
(
true
);
std
::
ostringstream
sout
;
sout
<<
"Run operator "
<<
this
->
Type
()
<<
" From ["
;
std
::
ostream_iterator
<
std
::
string
>
out_it
(
sout
,
","
);
std
::
copy
(
inputs
.
begin
(),
inputs
.
end
(),
out_it
);
sout
<<
"] to ["
;
std
::
copy
(
outputs
.
begin
(),
outputs
.
end
(),
out_it
);
sout
<<
"]"
;
VLOG
(
1
)
<<
sout
.
str
();
}
RuntimeInferShapeContext
infer_shape_ctx
(
*
this
,
scope
);
this
->
InferShape
(
&
infer_shape_ctx
);
...
...
paddle/framework/scope.cc
浏览文件 @
94e86897
...
...
@@ -38,11 +38,12 @@ Scope& Scope::NewScope() const {
Variable
*
Scope
::
Var
(
const
std
::
string
&
name
)
{
auto
iter
=
vars_
.
find
(
name
);
if
(
iter
!=
vars_
.
end
())
{
VLOG
(
3
)
<<
"Get existing variable "
<<
name
;
return
iter
->
second
;
}
Variable
*
v
=
new
Variable
();
vars_
[
name
]
=
v
;
VLOG
(
3
)
<<
"Create variable "
<<
name
<<
" on scope"
;
VLOG
(
3
)
<<
"Create variable "
<<
name
;
v
->
name_
=
&
(
vars_
.
find
(
name
)
->
first
);
return
v
;
}
...
...
paddle/framework/shape_inference.h
浏览文件 @
94e86897
...
...
@@ -53,6 +53,10 @@ class InferShapeContext {
virtual
bool
IsRuntime
()
const
=
0
;
// Note: In while op, we need this to be public
void
SetDims
(
const
std
::
vector
<
std
::
string
>
&
names
,
const
std
::
vector
<
framework
::
DDim
>
&
dims
);
protected:
virtual
framework
::
DDim
GetDim
(
const
std
::
string
&
name
)
const
=
0
;
virtual
void
SetDim
(
const
std
::
string
&
name
,
const
framework
::
DDim
&
dim
)
=
0
;
...
...
@@ -60,9 +64,6 @@ class InferShapeContext {
std
::
vector
<
framework
::
DDim
>
GetDims
(
const
std
::
vector
<
std
::
string
>
&
names
)
const
;
void
SetDims
(
const
std
::
vector
<
std
::
string
>
&
names
,
const
std
::
vector
<
framework
::
DDim
>
&
dims
);
std
::
vector
<
VarDesc
::
VarType
>
GetVarTypes
(
const
std
::
vector
<
std
::
string
>
&
names
)
const
;
...
...
paddle/gserver/layers/MKLDNNLayer.cpp
浏览文件 @
94e86897
...
...
@@ -22,7 +22,7 @@ namespace paddle {
bool
MKLDNNLayer
::
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
CHECK
(
FLAGS_use_mkldnn
)
<<
"MkldnnLayers only support use_mkldnn."
<<
"Please set WITH_MKL
DNN
=ON "
<<
"Please set WITH_MKL=ON "
<<
"and set use_mkldnn=True"
;
CHECK
(
!
useGpu_
)
<<
"Do not support GPU yet"
;
...
...
paddle/math/Storage.cpp
浏览文件 @
94e86897
...
...
@@ -17,9 +17,13 @@ limitations under the License. */
#include "paddle/utils/StringUtil.h"
#include "paddle/utils/Util.h"
#ifndef PADDLE_MOBILE_INFERENCE
DEFINE_int32
(
pool_limit_size
,
536870912
,
"maximum memory size managed by a memory pool, default is 512M"
);
#else
DEFINE_int32
(
pool_limit_size
,
0
,
"default is 0"
);
#endif
namespace
paddle
{
...
...
paddle/operators/array_operator.h
浏览文件 @
94e86897
...
...
@@ -42,6 +42,7 @@ class ArrayOp : public framework::OperatorBase {
}
else
{
offset
=
static_cast
<
size_t
>
(
*
i_tensor
.
data
<
int64_t
>
());
}
VLOG
(
10
)
<<
" Offset = "
<<
offset
;
return
offset
;
}
};
...
...
paddle/operators/bilinear_tensor_product_op.h
浏览文件 @
94e86897
...
...
@@ -174,7 +174,7 @@ class BilinearTensorProductGradKernel : public framework::OpKernel<T> {
// Caculate the gradient of Input(Bias).
if
(
d_bias
)
{
d_bias
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
d_bias_mat
=
EigenMatrix
<
T
>::
From
(
*
d_bias
);
auto
d_bias_mat
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
d_bias
);
d_bias_mat
.
device
(
place
)
=
d_out_mat
.
sum
(
Eigen
::
DSizes
<
int
,
1
>
(
0
));
}
}
...
...
paddle/operators/conv_transpose_op.cc
浏览文件 @
94e86897
...
...
@@ -30,11 +30,6 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
std
::
vector
<
int
>
strides
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"paddings"
);
for
(
size_t
i
=
0
;
i
<
paddings
.
size
();
++
i
)
{
PADDLE_ENFORCE_EQ
(
paddings
[
i
],
0
,
"No Padding allowed in conv transpose op."
);
}
PADDLE_ENFORCE
(
in_dims
.
size
()
==
4
||
in_dims
.
size
()
==
5
,
"ConvTransposeOp intput should be 4-D or 5-D tensor."
);
PADDLE_ENFORCE_EQ
(
in_dims
.
size
(),
filter_dims
.
size
(),
...
...
@@ -52,7 +47,7 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
std
::
vector
<
int64_t
>
output_shape
({
in_dims
[
0
],
filter_dims
[
1
]});
for
(
size_t
i
=
0
;
i
<
strides
.
size
();
++
i
)
{
output_shape
.
push_back
((
in_dims
[
i
+
2
]
-
1
)
*
strides
[
i
]
+
output_shape
.
push_back
((
in_dims
[
i
+
2
]
-
1
)
*
strides
[
i
]
-
2
*
paddings
[
i
]
+
filter_dims
[
i
+
2
]);
}
ctx
->
SetOutputDim
(
"Output"
,
framework
::
make_ddim
(
output_shape
));
...
...
paddle/operators/conv_transpose_op.h
浏览文件 @
94e86897
...
...
@@ -62,7 +62,6 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
Tensor
*
output
=
context
.
Output
<
Tensor
>
(
"Output"
);
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
// Actually, no paddings and groups allowed in conv transpose.
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
// TODO(Zhuoyuan): Paddings can be added in future.
// groups will alway be disabled in conv2dtranspose.
...
...
@@ -148,8 +147,8 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
}
else
if
(
filter_shape_vec
.
size
()
==
3
)
{
// col2vol: col_matrix -> dy
// from (c * k_d * k_h * k_w, d * h * w) to (c, o_d, o_h, o_w)
col2vol
(
context
.
device_context
(),
col
,
dilations
,
strides
,
std
::
vector
<
int
>
{
0
,
0
,
0
},
&
output_batch
);
col2vol
(
context
.
device_context
(),
col
,
dilations
,
strides
,
paddings
,
&
output_batch
);
}
}
}
...
...
@@ -173,7 +172,6 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
if
((
!
input_grad
)
&&
(
!
filter_grad
))
return
;
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
// Actually, no paddings and groups allowed in conv transpose.
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
...
...
paddle/operators/cos_sim_op.h
浏览文件 @
94e86897
...
...
@@ -132,7 +132,7 @@ class CosSimGradKernel : public framework::OpKernel<T> {
// compute dy
if
(
out_grad_y
)
{
out_grad_y
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
dy
=
Eigen
Matrix
<
T
>::
Reshape
(
*
out_grad_y
,
1
);
auto
dy
=
Eigen
Vector
<
T
>::
Flatten
(
*
out_grad_y
);
auto
grad
=
x
/
norm_prod_bcast
-
z_bcast
*
y_bcast
/
y_snorm_bcast
;
dy
.
device
(
place
)
=
(
dz_bcast
*
grad
).
sum
(
Eigen
::
array
<
int
,
1
>
({{
0
}}));
}
...
...
paddle/operators/detail/safe_ref.h
0 → 100644
浏览文件 @
94e86897
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
namespace
paddle
{
namespace
operators
{
namespace
detail
{
/**
* Get Reference From Pointer with check. The error message is printf format,
* and passed by `args`
*/
template
<
typename
T
,
typename
...
ARGS
>
inline
T
&
Ref
(
T
*
ptr
,
ARGS
&&
...
args
)
{
PADDLE_ENFORCE
(
ptr
!=
nullptr
,
args
...);
return
*
ptr
;
}
}
// namespace detail
}
// namespace operators
}
// namespace paddle
paddle/operators/fill_constant_batch_size_like_op.cc
浏览文件 @
94e86897
...
...
@@ -101,4 +101,7 @@ REGISTER_OPERATOR(fill_constant_batch_size_like,
REGISTER_OP_CPU_KERNEL
(
fill_constant_batch_size_like
,
ops
::
FillConstantBatchSizeLikeOpKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
FillConstantBatchSizeLikeOpKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
ops
::
FillConstantBatchSizeLikeOpKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
,
ops
::
FillConstantBatchSizeLikeOpKernel
<
paddle
::
platform
::
CPUPlace
,
int
>
,
ops
::
FillConstantBatchSizeLikeOpKernel
<
paddle
::
platform
::
CPUPlace
,
int64_t
>
);
paddle/operators/fill_constant_batch_size_like_op.cu.cc
浏览文件 @
94e86897
...
...
@@ -19,4 +19,7 @@ namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL
(
fill_constant_batch_size_like
,
ops
::
FillConstantBatchSizeLikeOpKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
,
ops
::
FillConstantBatchSizeLikeOpKernel
<
paddle
::
platform
::
GPUPlace
,
double
>
);
ops
::
FillConstantBatchSizeLikeOpKernel
<
paddle
::
platform
::
GPUPlace
,
double
>
,
ops
::
FillConstantBatchSizeLikeOpKernel
<
paddle
::
platform
::
GPUPlace
,
int
>
,
ops
::
FillConstantBatchSizeLikeOpKernel
<
paddle
::
platform
::
GPUPlace
,
int64_t
>
);
paddle/operators/fill_zeros_like_op.cc
浏览文件 @
94e86897
...
...
@@ -54,5 +54,8 @@ namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT
(
fill_zeros_like
,
ops
::
FillZerosLikeOp
,
ops
::
FillZerosLikeOpMaker
);
REGISTER_OP_CPU_KERNEL
(
fill_zeros_like
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
fill_zeros_like
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUPlace
,
int
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUPlace
,
int64_t
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUPlace
,
bool
>
);
paddle/operators/fill_zeros_like_op.cu.cc
浏览文件 @
94e86897
...
...
@@ -17,5 +17,8 @@
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_GPU_KERNEL
(
fill_zeros_like
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
fill_zeros_like
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
GPUPlace
,
int
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
GPUPlace
,
int64_t
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
GPUPlace
,
double
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
GPUPlace
,
bool
>
);
paddle/operators/is_empty_op.cc
0 → 100644
浏览文件 @
94e86897
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
namespace
paddle
{
namespace
operators
{
constexpr
char
kInput
[]
=
"X"
;
constexpr
char
kOutput
[]
=
"Out"
;
class
IsEmptyOp
:
public
framework
::
OperatorBase
{
public:
IsEmptyOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
OperatorBase
(
type
,
inputs
,
outputs
,
attrs
)
{}
void
Run
(
const
framework
::
Scope
&
scope
,
const
platform
::
DeviceContext
&
dev_ctx
)
const
override
{
// get input
auto
*
var
=
scope
.
FindVar
(
Input
(
kInput
));
PADDLE_ENFORCE_NOT_NULL
(
var
);
auto
&
tensor
=
var
->
Get
<
framework
::
LoDTensor
>
();
// get output
auto
*
out
=
scope
.
FindVar
(
Output
(
kOutput
));
PADDLE_ENFORCE_NOT_NULL
(
out
);
auto
*
out_tensor
=
out
->
GetMutable
<
framework
::
LoDTensor
>
();
out_tensor
->
Resize
({
1
});
out_tensor
->
mutable_data
<
bool
>
(
platform
::
CPUPlace
())[
0
]
=
framework
::
product
(
tensor
.
dims
())
==
0
;
}
};
class
IsEmptyOpProtoMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
IsEmptyOpProtoMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
kInput
,
"(Tensor) Tensor which is to be checked."
);
AddOutput
(
kOutput
,
"(Tensor) a boolean Tensor that indicate empty or not."
);
AddComment
(
R"DOC(
IsEmpty Operator which checks whether a tensor is empty.
It will just return product(tensor.ddims()) > 0;
)DOC"
);
}
};
}
// namespace operators
}
// namespace paddle
REGISTER_OP_WITHOUT_GRADIENT
(
is_empty
,
paddle
::
operators
::
IsEmptyOp
,
paddle
::
operators
::
IsEmptyOpProtoMaker
);
paddle/operators/math/CMakeLists.txt
浏览文件 @
94e86897
add_subdirectory
(
detail
)
if
(
WITH_GPU
)
nv_library
(
math_function SRCS math_function.cc math_function.cu im2col.cc im2col.cu DEPS cblas device_context
)
nv_library
(
math_function SRCS math_function.cc math_function.cu im2col.cc im2col.cu DEPS cblas device_context
framework_proto
)
nv_test
(
math_function_gpu_test SRCS math_function_test.cu DEPS math_function tensor
)
nv_library
(
selected_rows_functor SRCS selected_rows_functor.cc selected_rows_functor.cu DEPS selected_rows math_function
)
nv_test
(
selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu DEPS selected_rows_functor
)
...
...
@@ -15,7 +15,7 @@ if(WITH_GPU)
nv_library
(
lstm_compute SRCS lstm_compute.cc lstm_compute.cu DEPS device_context activation_functions
)
nv_library
(
gru_compute SRCS gru_compute.cc gru_compute.cu DEPS device_context activation_functions math_function
)
else
()
cc_library
(
math_function SRCS math_function.cc im2col.cc DEPS cblas device_context
)
cc_library
(
math_function SRCS math_function.cc im2col.cc DEPS cblas device_context
framework_proto
)
cc_library
(
selected_rows_functor SRCS selected_rows_functor.cc DEPS selected_rows math_function
)
cc_library
(
softmax SRCS softmax.cc DEPS device_context
)
cc_library
(
cross_entropy SRCS cross_entropy.cc DEPS device_context
)
...
...
paddle/operators/math/im2col.cu
浏览文件 @
94e86897
...
...
@@ -119,8 +119,8 @@ __global__ void col2im(int n, const T* data_col, int im_height, int im_width,
if
(
index
<
n
)
{
T
val
=
0
;
int
w
=
index
%
im_width
;
int
h
=
(
index
/
im_width
)
%
im_height
;
int
w
=
index
%
im_width
+
padding_width
;
int
h
=
(
index
/
im_width
)
%
im_height
+
padding_height
;
int
c
=
index
/
(
im_width
*
im_height
);
// compute the start and end of the output
...
...
paddle/operators/math/math_function.cc
浏览文件 @
94e86897
...
...
@@ -250,6 +250,8 @@ void axpy<platform::CPUPlace, double>(const platform::DeviceContext& context,
template
struct
SetConstant
<
platform
::
CPUPlace
,
float
>;
template
struct
SetConstant
<
platform
::
CPUPlace
,
double
>;
template
struct
SetConstant
<
platform
::
CPUPlace
,
int
>;
template
struct
SetConstant
<
platform
::
CPUPlace
,
int64_t
>;
template
struct
SetConstant
<
platform
::
CPUPlace
,
bool
>;
#define DEFINE_CPU_TRANS(RANK) \
template struct Transpose<platform::CPUPlace, float, RANK>; \
...
...
paddle/operators/math/math_function.cu
浏览文件 @
94e86897
...
...
@@ -256,6 +256,8 @@ void axpy<platform::GPUPlace, double>(const platform::DeviceContext& context,
template
struct
SetConstant
<
platform
::
GPUPlace
,
float
>;
template
struct
SetConstant
<
platform
::
GPUPlace
,
double
>;
template
struct
SetConstant
<
platform
::
GPUPlace
,
int
>;
template
struct
SetConstant
<
platform
::
GPUPlace
,
int64_t
>;
template
struct
SetConstant
<
platform
::
GPUPlace
,
bool
>;
#define DEFINE_GPU_TRANS(RANK) \
template struct Transpose<platform::GPUPlace, float, RANK>; \
...
...
paddle/operators/sum_op.cc
浏览文件 @
94e86897
...
...
@@ -12,6 +12,7 @@ limitations under the License. */
#include "paddle/operators/sum_op.h"
#include <vector>
#include "paddle/framework/var_type_inference.h"
#include "paddle/operators/detail/safe_ref.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -59,7 +60,8 @@ class SumOp : public framework::OperatorWithKernel {
x_vars
[
0
]
->
Get
<
framework
::
SelectedRows
>
().
value
().
type
()),
ctx
.
device_context
());
}
else
if
(
x_vars
[
0
]
->
IsType
<
framework
::
LoDTensorArray
>
())
{
auto
&
array
=
x_vars
[
0
]
->
Get
<
framework
::
LoDTensorArray
>
();
for
(
auto
&
x_var
:
x_vars
)
{
auto
&
array
=
x_var
->
Get
<
framework
::
LoDTensorArray
>
();
for
(
auto
&
each
:
array
)
{
if
(
each
.
numel
()
!=
0
)
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
each
.
type
()),
...
...
@@ -67,6 +69,8 @@ class SumOp : public framework::OperatorWithKernel {
}
}
}
PADDLE_THROW
(
"Cannot find the input data type by all input data"
);
}
PADDLE_THROW
(
"Unexpected branch. Input type is %s"
,
x_vars
[
0
]
->
Type
().
name
());
}
...
...
@@ -96,6 +100,11 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
auto
&
inputs
=
op_desc
.
Input
(
"X"
);
auto
var_type
=
framework
::
VarDesc
::
SELECTED_ROWS
;
for
(
auto
&
name
:
op_desc
.
Input
(
"X"
))
{
VLOG
(
10
)
<<
name
<<
" "
<<
block
->
FindRecursiveOrCreateVar
(
name
)
->
GetType
();
}
bool
any_input_is_lod_tensor
=
std
::
any_of
(
inputs
.
begin
(),
inputs
.
end
(),
[
block
](
const
std
::
string
&
name
)
{
return
block
->
FindRecursiveOrCreateVar
(
name
)
->
GetType
()
==
...
...
@@ -103,7 +112,7 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
});
auto
is_tensor_array
=
[
block
](
const
std
::
string
&
name
)
{
return
block
->
FindRecursiveOrCreateVar
(
name
)
->
GetType
()
==
return
detail
::
Ref
(
block
->
FindRecursiveOrCreateVar
(
name
)).
GetType
()
==
framework
::
VarDesc
::
LOD_TENSOR_ARRAY
;
};
...
...
@@ -113,14 +122,26 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
std
::
all_of
(
inputs
.
begin
(),
inputs
.
end
(),
is_tensor_array
);
if
(
any_input_is_tensor_array
)
{
PADDLE_ENFORCE
(
all_inputs_are_tensor_array
);
if
(
!
all_inputs_are_tensor_array
)
{
std
::
ostringstream
os
;
for
(
auto
&
each
:
inputs
)
{
os
<<
" "
<<
each
<<
" type is "
<<
detail
::
Ref
(
block
->
FindRecursiveOrCreateVar
(
each
)).
GetType
()
<<
"
\n
"
;
}
PADDLE_ENFORCE
(
all_inputs_are_tensor_array
,
"Not all inputs are tensor array:
\n
%s"
,
os
.
str
());
}
var_type
=
framework
::
VarDesc
::
LOD_TENSOR_ARRAY
;
}
else
if
(
any_input_is_lod_tensor
)
{
var_type
=
framework
::
VarDesc
::
LOD_TENSOR
;
}
auto
out_var_name
=
op_desc
.
Output
(
"Out"
).
front
();
block
->
FindRecursiveOrCreateVar
(
out_var_name
)
->
SetType
(
var_type
);
auto
&
out_var
=
detail
::
Ref
(
block
->
FindRecursiveOrCreateVar
(
out_var_name
));
out_var
.
SetType
(
var_type
);
auto
&
in_var
=
detail
::
Ref
(
block
->
FindVarRecursive
(
inputs
.
front
()));
out_var
.
SetDataType
(
in_var
.
GetDataType
());
}
};
...
...
paddle/operators/tensor_array_read_write_op.cc
浏览文件 @
94e86897
...
...
@@ -12,7 +12,7 @@
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/array_operator.h"
#include "paddle/operators/detail/safe_ref.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -33,6 +33,8 @@ class WriteToArrayOp : public ArrayOp {
auto
*
out
=
scope
.
FindVar
(
Output
(
"Out"
))
->
GetMutable
<
framework
::
LoDTensorArray
>
();
if
(
offset
>=
out
->
size
())
{
VLOG
(
10
)
<<
"Resize "
<<
Output
(
"Out"
)
<<
" from "
<<
out
->
size
()
<<
" to "
<<
offset
+
1
;
out
->
resize
(
offset
+
1
);
}
auto
*
out_tensor
=
&
out
->
at
(
offset
);
...
...
@@ -85,11 +87,15 @@ class WriteToArrayInferVarType : public framework::VarTypeInference {
public:
void
operator
()(
const
framework
::
OpDescBind
&
op_desc
,
framework
::
BlockDescBind
*
block
)
const
override
{
for
(
auto
&
out_var
:
op_desc
.
OutputArgumentNames
())
{
VLOG
(
10
)
<<
"Set Variable "
<<
out_var
<<
" as LOD_TENSOR_ARRAY"
;
block
->
FindRecursiveOrCreateVar
(
out_var
)
->
SetType
(
framework
::
VarDesc
::
LOD_TENSOR_ARRAY
);
}
auto
x_name
=
op_desc
.
Input
(
"X"
)[
0
];
auto
out_name
=
op_desc
.
Output
(
"Out"
)[
0
];
VLOG
(
10
)
<<
"Set Variable "
<<
out_name
<<
" as LOD_TENSOR_ARRAY"
;
auto
&
out
=
detail
::
Ref
(
block
->
FindRecursiveOrCreateVar
(
out_name
),
"Cannot found %s"
,
out_name
);
out
.
SetType
(
framework
::
VarDesc
::
LOD_TENSOR_ARRAY
);
auto
&
x
=
detail
::
Ref
(
block
->
FindVarRecursive
(
x_name
),
"Cannot found %s"
,
x_name
);
out
.
SetDataType
(
x
.
GetDataType
());
}
};
...
...
@@ -107,11 +113,11 @@ class ReadFromArrayOp : public ArrayOp {
auto
&
x_array
=
x
->
Get
<
framework
::
LoDTensorArray
>
();
auto
*
out
=
scope
.
FindVar
(
Output
(
"Out"
));
PADDLE_ENFORCE
(
out
!=
nullptr
,
"Out must be set"
);
auto
*
out_te
sn
or
=
out
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
out_te
ns
or
=
out
->
GetMutable
<
framework
::
LoDTensor
>
();
size_t
offset
=
GetOffset
(
scope
,
dev_ctx
);
PADDLE_ENFORCE_LT
(
offset
,
x_array
.
size
());
out_te
sn
or
->
CopyFrom
(
x_array
[
offset
],
dev_ctx
.
GetPlace
(),
dev_ctx
);
out_te
sn
or
->
set_lod
(
x_array
[
offset
].
lod
());
out_te
ns
or
->
CopyFrom
(
x_array
[
offset
],
dev_ctx
.
GetPlace
(),
dev_ctx
);
out_te
ns
or
->
set_lod
(
x_array
[
offset
].
lod
());
}
};
...
...
paddle/operators/while_op.cc
浏览文件 @
94e86897
...
...
@@ -14,8 +14,10 @@
#include <vector>
#include "paddle/framework/executor.h"
#include "paddle/framework/lod_tensor_array.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
#include "paddle/operators/detail/safe_ref.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -26,8 +28,9 @@ using LoDTensor = framework::LoDTensor;
constexpr
char
kStepBlock
[]
=
"step_block"
;
constexpr
char
kCondition
[]
=
"Condition"
;
constexpr
char
kStepScopes
[]
=
"StepScopes"
;
constexpr
char
kParamGrads
[]
=
"X@Grad"
;
constexpr
char
kParameters
[]
=
"X"
;
constexpr
char
kParamGrads
[]
=
"X@GRAD"
;
constexpr
char
kOutputs
[]
=
"Out"
;
class
WhileOp
:
public
framework
::
OperatorBase
{
public:
...
...
@@ -71,9 +74,9 @@ class WhileOpMaker : public framework::OpProtoAndCheckerMaker {
kCondition
,
"(Bool) An scalar. When it's False, the While Op will be terminated."
)
.
AsDuplicable
();
AddOutput
(
"Out"
,
AddOutput
(
kOutputs
,
"A set of variables, which will be assigned with values "
"generated by perators inside the block of While Op."
)
"generated by
the o
perators inside the block of While Op."
)
.
AsDuplicable
();
AddOutput
(
kStepScopes
,
"(StepScopeVar) A vector of local scope, which size equals the "
...
...
@@ -104,17 +107,64 @@ class WhileGradOp : public framework::OperatorBase {
auto
*
step_scopes
=
scope
.
FindVar
(
Input
(
kStepScopes
))
->
GetMutable
<
StepScopeVar
>
();
auto
outside_og_names
=
Inputs
(
framework
::
GradVarName
(
kOutputs
));
auto
inside_og_names
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"original_output_grad"
);
PADDLE_ENFORCE_EQ
(
outside_og_names
.
size
(),
inside_og_names
.
size
());
for
(
auto
cur_scope_iter
=
step_scopes
->
rbegin
();
cur_scope_iter
!=
step_scopes
->
rend
();
++
cur_scope_iter
)
{
VLOG
(
3
)
<<
"Start backward at time_step "
<<
cur_scope_iter
-
step_scopes
->
rbegin
();
framework
::
Scope
&
cur_scope
=
**
cur_scope_iter
;
// Link OG from outside to inside
for
(
size_t
i
=
0
;
i
<
outside_og_names
.
size
();
++
i
)
{
auto
outside_og_name
=
outside_og_names
[
i
];
auto
inside_og_name
=
inside_og_names
[
i
];
VLOG
(
10
)
<<
"Linking outside "
<<
outside_og_name
<<
" --> inside "
<<
inside_og_name
;
auto
&
og_outside
=
detail
::
Ref
(
scope
.
FindVar
(
outside_og_name
));
auto
&
og_inside
=
detail
::
Ref
(
cur_scope
.
Var
(
inside_og_name
));
if
(
og_outside
.
Type
().
hash_code
()
==
typeid
(
framework
::
LoDTensor
).
hash_code
())
{
auto
&
outside_tensor
=
og_outside
.
Get
<
framework
::
LoDTensor
>
();
auto
&
inside_tensor
=
detail
::
Ref
(
og_inside
.
GetMutable
<
framework
::
LoDTensor
>
());
inside_tensor
.
set_lod
(
outside_tensor
.
lod
());
inside_tensor
.
ShareDataWith
(
outside_tensor
);
}
else
if
(
og_outside
.
Type
().
hash_code
()
==
typeid
(
framework
::
LoDTensorArray
).
hash_code
())
{
auto
&
outside_array
=
og_outside
.
Get
<
framework
::
LoDTensorArray
>
();
auto
&
inside_array
=
detail
::
Ref
(
og_inside
.
GetMutable
<
framework
::
LoDTensorArray
>
());
VLOG
(
10
)
<<
outside_og_name
<<
" size = "
<<
outside_array
.
size
();
inside_array
.
resize
(
outside_array
.
size
());
for
(
size_t
j
=
0
;
j
<
inside_array
.
size
();
++
j
)
{
VLOG
(
10
)
<<
j
<<
" "
<<
outside_array
[
j
].
numel
();
if
(
outside_array
[
j
].
numel
()
!=
0
)
{
inside_array
[
j
].
set_lod
(
outside_array
[
j
].
lod
());
inside_array
[
j
].
ShareDataWith
(
outside_array
[
j
]);
}
else
{
PADDLE_ENFORCE_EQ
(
inside_array
[
j
].
numel
(),
0
);
}
}
}
}
executor
.
Run
(
*
program
,
*
cur_scope_iter
,
block
->
ID
(),
false
);
auto
&
pg_names
=
Outputs
(
kParamGrads
);
auto
&
p_names
=
Inputs
(
kParameters
);
PADDLE_ENFORCE_EQ
(
pg_names
.
size
(),
p_names
.
size
());
for
(
size_t
prog_id
=
0
;
prog_id
<
pg_names
.
size
();
++
prog_id
)
{
auto
inside_grad_name
=
framework
::
GradVarName
(
p_names
[
prog_id
]);
for
(
size_t
param_id
=
0
;
param_id
<
pg_names
.
size
();
++
param_id
)
{
if
(
pg_names
[
param_id
]
==
framework
::
kEmptyVarName
)
{
continue
;
// iterator doesn't have gradient
}
auto
inside_grad_name
=
framework
::
GradVarName
(
p_names
[
param_id
]);
// // TODO(tonyyang-s
avil
: Not sure we need the following
// // TODO(tonyyang-s
vail)
: Not sure we need the following
// // If does not compute gradient of that variable inside rnn,
// just
// // continue
...
...
@@ -126,7 +176,7 @@ class WhileGradOp : public framework::OperatorBase {
// zero gradient variable in step 0
if
(
cur_scope_iter
==
step_scopes
->
rbegin
())
{
auto
*
var
=
(
*
cur_scope_iter
)
->
FindVar
(
inside_grad_name
);
PADDLE_ENFORCE_NOT_NULL
(
var
);
PADDLE_ENFORCE_NOT_NULL
(
var
,
"Can not find var %s"
,
inside_grad_name
);
if
(
var
->
IsType
<
LoDTensor
>
())
{
auto
&
inside_tensor
=
var
->
Get
<
framework
::
LoDTensor
>
();
framework
::
AttributeMap
attrs
;
...
...
@@ -135,27 +185,18 @@ class WhileGradOp : public framework::OperatorBase {
attrs
[
"value"
]
=
0.0
f
;
auto
zero_op
=
framework
::
OpRegistry
::
CreateOp
(
"fill_constant"
,
{},
{{
"Out"
,
{
pg_names
[
p
rog
_id
]}}},
attrs
);
"fill_constant"
,
{},
{{
"Out"
,
{
pg_names
[
p
aram
_id
]}}},
attrs
);
zero_op
->
Run
(
scope
,
dev_ctx
);
}
}
// sum gradient
auto
*
outside_var
=
scope
.
FindVar
(
pg_names
[
prog_id
]);
PADDLE_ENFORCE_NOT_NULL
(
outside_var
);
auto
&
outside_tensor
=
*
outside_var
->
GetMutable
<
framework
::
LoDTensor
>
();
std
::
string
result_var_name
;
auto
*
local_result_var
=
(
*
cur_scope_iter
)
->
Var
(
&
result_var_name
);
auto
&
local_result_tensor
=
*
local_result_var
->
GetMutable
<
framework
::
LoDTensor
>
();
local_result_tensor
.
ShareDataWith
(
outside_tensor
);
auto
new_inside_name
=
cur_scope
.
Rename
(
inside_grad_name
);
auto
sum_op
=
framework
::
OpRegistry
::
CreateOp
(
"sum"
,
{{
"X"
,
{
result_var_name
,
inside_grad_name
}}},
{{
"Out"
,
{
result_var_name
}}},
{});
sum_op
->
Run
(
**
cur_scope_iter
,
dev_ctx
);
"sum"
,
{{
"X"
,
{
pg_names
[
param_id
],
new_inside_name
}}},
{{
"Out"
,
{
pg_names
[
param_id
]}}},
{});
sum_op
->
Run
(
cur_scope
,
dev_ctx
);
cur_scope
.
Rename
(
new_inside_name
,
inside_grad_name
);
}
}
}
...
...
@@ -169,29 +210,110 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker {
virtual
std
::
unique_ptr
<
framework
::
OpDescBind
>
Apply
()
const
{
auto
*
grad
=
new
framework
::
OpDescBind
();
grad
->
SetType
(
"while_grad"
);
for
(
auto
&
input_param
:
this
->
InputNames
())
{
grad
->
SetInput
(
input_param
,
this
->
Input
(
input_param
));
grad
->
SetOutput
(
framework
::
GradVarName
(
input_param
),
this
->
InputGrad
(
input_param
));
grad
->
SetInput
(
kParameters
,
Input
(
kParameters
));
grad
->
SetOutput
(
framework
::
GradVarName
(
kParameters
),
InputGrad
(
kParameters
,
/*do not drop empty gradient*/
false
));
grad
->
SetInput
(
kOutputs
,
Output
(
kOutputs
));
// OG should be re-calculated by step blocks, since many outputs of while op
// do not need to calculate gradients.
std
::
unordered_set
<
std
::
string
>
block_ins
;
{
for
(
auto
&
p
:
Input
(
kParameters
))
{
block_ins
.
insert
(
p
);
}
for
(
auto
&
o
:
Output
(
kOutputs
))
{
block_ins
.
insert
(
o
);
}
}
std
::
unordered_set
<
std
::
string
>
extra_inputs
;
for
(
size_t
i
=
0
;
i
<
grad_block_
[
0
]
->
OpSize
();
++
i
)
{
for
(
auto
&
input_name
:
grad_block_
[
0
]
->
Op
(
i
)
->
InputArgumentNames
())
{
if
(
block_ins
.
find
(
input_name
)
!=
block_ins
.
end
())
{
continue
;
}
extra_inputs
.
insert
(
input_name
);
}
for
(
auto
&
output_param
:
this
->
OutputNames
())
{
grad
->
SetInput
(
output_param
,
this
->
Output
(
output_param
));
if
(
output_param
!=
kStepScopes
)
{
grad
->
SetInput
(
framework
::
GradVarName
(
output_param
),
this
->
OutputGrad
(
output_param
));
for
(
auto
&
output_name
:
grad_block_
[
0
]
->
Op
(
i
)
->
OutputArgumentNames
())
{
block_ins
.
insert
(
output_name
);
}
}
std
::
vector
<
std
::
string
>
extra_inputs_list
;
extra_inputs_list
.
resize
(
extra_inputs
.
size
());
std
::
copy
(
extra_inputs
.
begin
(),
extra_inputs
.
end
(),
extra_inputs_list
.
begin
());
grad
->
SetInput
(
framework
::
GradVarName
(
kOutputs
),
extra_inputs_list
);
grad
->
SetInput
(
kStepScopes
,
Output
(
kStepScopes
));
grad
->
SetAttrMap
(
this
->
Attrs
());
grad
->
SetBlockAttr
(
kStepBlock
,
*
grad_block_
[
0
]);
// record the original output gradient names, since the gradient name of
// while operator could be renamed.
grad
->
SetAttr
(
"original_output_grad"
,
extra_inputs_list
);
return
std
::
unique_ptr
<
framework
::
OpDescBind
>
(
grad
);
}
};
class
WhileGradOpVarTypeInference
:
public
framework
::
VarTypeInference
{
public:
void
operator
()(
const
framework
::
OpDescBind
&
op_desc
,
framework
::
BlockDescBind
*
block
)
const
override
{
auto
p_names
=
op_desc
.
Input
(
kParameters
);
auto
pg_names
=
op_desc
.
Output
(
framework
::
GradVarName
(
kParameters
));
for
(
size_t
i
=
0
;
i
<
p_names
.
size
();
++
i
)
{
auto
&
p_var
=
detail
::
Ref
(
block
->
FindVarRecursive
(
p_names
[
i
]));
auto
*
g_var
=
block
->
FindVarRecursive
(
pg_names
[
i
]);
if
(
g_var
!=
nullptr
)
{
// Gradient could be @EMPTY@
VLOG
(
5
)
<<
"Setting "
<<
pg_names
[
i
]
<<
" following "
<<
p_names
[
i
]
<<
" type: "
<<
p_var
.
GetType
();
g_var
->
SetType
(
p_var
.
GetType
());
g_var
->
SetDataType
(
p_var
.
GetDataType
());
}
}
}
};
class
WhileGradOpShapeInference
:
public
framework
::
InferShapeBase
{
public:
void
operator
()(
framework
::
InferShapeContext
*
ctx
)
const
override
{
ctx
->
HasInputs
(
kParameters
);
ctx
->
HasOutputs
(
framework
::
GradVarName
(
kParameters
));
ctx
->
HasInputs
(
kOutputs
);
ctx
->
HasInputs
(
framework
::
GradVarName
(
kOutputs
));
auto
p_names
=
ctx
->
Inputs
(
kParameters
);
auto
pg_names
=
ctx
->
Outputs
(
kParamGrads
);
auto
dims
=
ctx
->
GetInputsDim
(
kParameters
);
auto
var_types
=
ctx
->
GetInputsVarType
(
kParameters
);
std
::
vector
<
std
::
string
>
names_to_set
;
std
::
vector
<
framework
::
DDim
>
dims_to_set
;
for
(
size_t
i
=
0
;
i
<
p_names
.
size
();
++
i
)
{
if
(
pg_names
[
i
]
==
framework
::
kEmptyVarName
)
{
continue
;
}
if
(
var_types
[
i
]
==
framework
::
VarDesc
::
LOD_TENSOR
)
{
names_to_set
.
push_back
(
pg_names
[
i
]);
dims_to_set
.
push_back
(
dims
[
i
]);
}
else
if
(
var_types
[
i
]
==
framework
::
VarDesc
::
LOD_TENSOR_ARRAY
)
{
// not sure how to set the dim of LOD_TENSOR_ARRAY
names_to_set
.
push_back
(
pg_names
[
i
]);
dims_to_set
.
push_back
(
dims
[
i
]);
}
}
ctx
->
SetDims
(
names_to_set
,
dims_to_set
);
}
};
}
// namespace operators
}
// namespace paddle
REGISTER_OPERATOR
(
while
,
paddle
::
operators
::
WhileOp
,
paddle
::
operators
::
WhileOpMaker
,
paddle
::
operators
::
WhileGradOpDescMaker
);
REGISTER_OPERATOR
(
while_grad
,
paddle
::
operators
::
WhileGradOp
,
paddle
::
operators
::
WhileGradOpShapeInference
,
paddle
::
operators
::
WhileGradOpVarTypeInference
);
paddle/scripts/docker/README.md
浏览文件 @
94e86897
...
...
@@ -57,8 +57,7 @@ Users can specify the following Docker build arguments with either "ON" or "OFF"
|
`WITH_GPU`
| OFF | Generates NVIDIA CUDA GPU code and relies on CUDA libraries. |
|
`WITH_AVX`
| OFF | Set to "ON" to enable AVX support. |
|
`WITH_TESTING`
| ON | Build unit tests binaries. |
|
`WITH_MKLDNN`
| ON | Build with
[
Intel® MKL DNN
](
https://github.com/01org/mkl-dnn
)
support. |
|
`WITH_MKLML`
| ON | Build with
[
Intel® MKL
](
https://software.intel.com/en-us/mkl
)
support. |
|
`WITH_MKL`
| ON | Build with
[
Intel® MKL
](
https://software.intel.com/en-us/mkl
)
and
[
Intel® MKL-DNN
](
https://github.com/01org/mkl-dnn
)
support. |
|
`WITH_GOLANG`
| ON | Build fault-tolerant parameter server written in go. |
|
`WITH_SWIG_PY`
| ON | Build with SWIG python API support. |
|
`WITH_C_API`
| OFF | Build capi libraries for inference. |
...
...
paddle/scripts/docker/build.sh
浏览文件 @
94e86897
...
...
@@ -34,9 +34,7 @@ function cmake_gen() {
${
PYTHON_FLAGS
}
-DWITH_DOC=OFF
-DWITH_GPU=
${
WITH_GPU
:-
OFF
}
-DCUDA_ARCH_NAME=All
-DWITH_MKLDNN=
${
WITH_MKLDNN
:-
ON
}
-DWITH_MKLML=
${
WITH_MKLML
:-
ON
}
-DWITH_MKL=
${
WITH_MKL
:-
ON
}
-DWITH_AVX=
${
WITH_AVX
:-
OFF
}
-DWITH_GOLANG=
${
WITH_GOLANG
:-
ON
}
-DWITH_SWIG_PY=ON
...
...
@@ -57,9 +55,7 @@ EOF
${
PYTHON_FLAGS
}
\
-DWITH_DOC
=
OFF
\
-DWITH_GPU
=
${
WITH_GPU
:-
OFF
}
\
-DCUDA_ARCH_NAME
=
All
\
-DWITH_MKLDNN
=
${
WITH_MKLDNN
:-
ON
}
\
-DWITH_MKLML
=
${
WITH_MKLML
:-
ON
}
\
-DWITH_MKL
=
${
WITH_MKL
:-
ON
}
\
-DWITH_AVX
=
${
WITH_AVX
:-
OFF
}
\
-DWITH_GOLANG
=
${
WITH_GOLANG
:-
ON
}
\
-DWITH_SWIG_PY
=
${
WITH_SWIG_PY
:-
ON
}
\
...
...
paddle/scripts/submit_local.sh.in
浏览文件 @
94e86897
...
...
@@ -18,8 +18,8 @@ function version(){
echo
"PaddlePaddle @PADDLE_VERSION@, compiled with"
echo
" with_avx: @WITH_AVX@"
echo
" with_gpu: @WITH_GPU@"
echo
" with_mkl: @WITH_MKL@"
echo
" with_mkldnn: @WITH_MKLDNN@"
echo
" with_mklml: @WITH_MKLML@"
echo
" with_double: @WITH_DOUBLE@"
echo
" with_python: @WITH_PYTHON@"
echo
" with_rdma: @WITH_RDMA@"
...
...
@@ -45,8 +45,8 @@ function ver2num() {
function
cpu_config
()
{
# auto set KMP_AFFINITY and OMP_DYNAMIC from Hyper Threading Status
# only when MKL
DNN or MKLML
enabled
if
[
"@WITH_MKL
DNN@"
==
"OFF"
]
&&
[
"@WITH_MKLML@"
==
"OFF"
]
;
then
# only when MKL enabled
if
[
"@WITH_MKL
@"
==
"OFF"
]
;
then
return
0
fi
ht
=
`
lscpu |grep
"per core"
|awk
-F
':'
'{print $2}'
|xargs
`
...
...
@@ -70,8 +70,8 @@ function cpu_config() {
function
threads_config
()
{
# auto set OMP_NUM_THREADS and MKL_NUM_THREADS
# according to trainer_count and total processors
# only when MKL
DNN or MKLML
enabled
if
[
"@WITH_MKL
DNN@"
==
"OFF"
]
&&
[
"@WITH_MKLML@"
==
"OFF"
]
;
then
# only when MKL enabled
if
[
"@WITH_MKL
@"
==
"OFF"
]
;
then
return
0
fi
processors
=
`
grep
"processor"
/proc/cpuinfo|sort
-u
|wc
-l
`
...
...
paddle/scripts/travis/build_doc.sh
浏览文件 @
94e86897
...
...
@@ -6,7 +6,7 @@ mkdir -p $TRAVIS_BUILD_DIR/build
cd
$TRAVIS_BUILD_DIR
/build
# Compile Documentation only.
cmake ..
-DCMAKE_BUILD_TYPE
=
Debug
-DWITH_GPU
=
OFF
-DWITH_MKL
DNN
=
OFF
-DWITH_MKLML
=
OFF
-DWITH_DOC
=
ON
cmake ..
-DCMAKE_BUILD_TYPE
=
Debug
-DWITH_GPU
=
OFF
-DWITH_MKL
=
OFF
-DWITH_DOC
=
ON
make
-j
`
nproc
`
gen_proto_py
make
-j
`
nproc
`
paddle_docs paddle_docs_cn
...
...
paddle/trainer/Trainer.cpp
浏览文件 @
94e86897
...
...
@@ -137,6 +137,10 @@ void Trainer::init(const std::shared_ptr<TrainerConfigHelper>& config,
}
}
if
(
FLAGS_use_mkldnn
)
{
CHECK_EQ
(
FLAGS_trainer_count
,
1UL
)
<<
"MKLDNN only need 1 trainer"
;
}
if
(
testing
)
{
LOG
(
INFO
)
<<
"trainer: in testing mode"
;
if
(
config_
->
getOptConfig
().
use_sparse_remote_updater
()
||
...
...
python/paddle/v2/fluid/framework.py
浏览文件 @
94e86897
...
...
@@ -12,9 +12,9 @@ def unique_name(prefix):
return
"_"
.
join
([
prefix
,
str
(
uid
)])
def
_debug_string_
(
proto
):
def
_debug_string_
(
proto
,
throw_on_error
=
True
):
error_fields
=
list
()
if
not
proto
.
IsInitialized
(
error_fields
):
if
not
proto
.
IsInitialized
(
error_fields
)
and
throw_on_error
:
raise
ValueError
(
"{0} are not initialized
\n
The message is {1}"
.
format
(
error_fields
,
proto
))
return
proto
.
__str__
()
...
...
@@ -101,9 +101,12 @@ class Variable(object):
self
.
stop_gradient
=
stop_gradient
def
__str__
(
self
):
return
self
.
to_string
(
True
)
def
to_string
(
self
,
throw_on_error
):
protostr
=
self
.
desc
.
serialize_to_string
()
proto
=
framework_pb2
.
VarDesc
.
FromString
(
str
(
protostr
))
return
_debug_string_
(
proto
)
return
_debug_string_
(
proto
,
throw_on_error
)
__repr__
=
__str__
...
...
@@ -291,10 +294,13 @@ class Operator(object):
self
.
desc
.
infer_var_type
(
self
.
block
.
desc
)
self
.
desc
.
infer_shape
(
self
.
block
.
desc
)
def
__str__
(
self
):
def
to_string
(
self
,
throw_on_error
):
protostr
=
self
.
desc
.
serialize_to_string
()
proto
=
framework_pb2
.
OpDesc
.
FromString
(
str
(
protostr
))
return
_debug_string_
(
proto
)
return
_debug_string_
(
proto
,
throw_on_error
)
def
__str__
(
self
):
return
self
.
to_string
(
True
)
__repr__
=
__str__
...
...
@@ -349,9 +355,12 @@ class Block(object):
self
.
program
=
program
def
__str__
(
self
):
return
self
.
to_string
(
True
)
def
to_string
(
self
,
throw_on_error
):
protostr
=
self
.
desc
.
serialize_to_string
()
proto
=
framework_pb2
.
BlockDesc
.
FromString
(
str
(
protostr
))
return
_debug_string_
(
proto
)
return
_debug_string_
(
proto
,
throw_on_error
)
__repr__
=
__str__
...
...
@@ -454,9 +463,12 @@ class Program(object):
self
.
current_block_idx
=
0
def
__str__
(
self
):
return
self
.
to_string
(
True
)
def
to_string
(
self
,
throw_on_error
):
protostr
=
self
.
desc
.
serialize_to_string
()
proto
=
framework_pb2
.
ProgramDesc
.
FromString
(
str
(
protostr
))
return
_debug_string_
(
proto
)
return
_debug_string_
(
proto
,
throw_on_error
)
def
clone
(
self
):
p
=
Program
()
...
...
@@ -512,7 +524,14 @@ class Program(object):
assert
isinstance
(
target
,
Variable
)
if
no_grad_set
is
None
:
no_grad_set
=
set
()
param_to_grad_info
=
self
.
desc
.
append_backward
(
target
.
desc
,
no_grad_set
)
try
:
param_to_grad_info
=
self
.
desc
.
append_backward
(
target
.
desc
,
no_grad_set
)
except
Exception
as
e
:
raise
core
.
EnforceNotMet
(
str
(
e
)
+
"
\n
Current protobuf is
\n
{0}"
.
format
(
self
.
to_string
(
False
)))
self
.
sync_with_cpp
()
return
param_to_grad_info
...
...
python/paddle/v2/fluid/net_drawer.py
浏览文件 @
94e86897
...
...
@@ -66,10 +66,13 @@ def parse_graph(program, graph, var_dict, **kwargs):
if
not
var_dict
.
has_key
(
var
):
var_dict
[
var
]
=
"Feed"
temp_id
=
0
proto
=
framework_pb2
.
ProgramDesc
.
FromString
(
program
.
desc
.
serialize_to_string
())
for
block
in
proto
.
blocks
:
for
op
in
block
.
ops
:
op
.
type
=
op
.
type
+
"_"
+
str
(
temp_id
)
temp_id
+=
1
graph
.
node
(
**
draw_node
(
op
))
for
o
in
op
.
outputs
:
for
arg
in
o
.
arguments
:
...
...
@@ -78,6 +81,7 @@ def parse_graph(program, graph, var_dict, **kwargs):
for
arg
in
e
.
arguments
:
if
var_dict
.
has_key
(
arg
):
graph
.
edge
(
**
draw_edge
(
var_dict
,
op
,
e
,
arg
))
break
# only plot the first block
def
draw_graph
(
startup_program
,
main_program
,
**
kwargs
):
...
...
python/paddle/v2/fluid/tests/book/test_fit_a_line.py
浏览文件 @
94e86897
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.optimizer
as
optimizer
import
paddle.v2.fluid.framework
as
framework
from
paddle.v2.fluid.io
import
save_persistables
,
load_persistable
s
import
paddle.v2.fluid.layers
as
layer
s
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.io
import
save_persistables
,
load_persistables
from
paddle.v2.fluid.optimizer
import
SGDOptimizer
import
numpy
as
np
x
=
layers
.
data
(
name
=
'x'
,
shape
=
[
13
],
data_type
=
'float32'
)
x
=
layers
.
data
(
name
=
'x'
,
shape
=
[
13
],
data_type
=
'float32'
)
y_predict
=
layers
.
fc
(
input
=
x
,
size
=
1
,
act
=
None
)
y_predict
=
layers
.
fc
(
input
=
x
,
size
=
1
,
act
=
None
)
y
=
layers
.
data
(
name
=
'y'
,
shape
=
[
1
],
data_type
=
'float32'
)
y
=
layers
.
data
(
name
=
'y'
,
shape
=
[
1
],
data_type
=
'float32'
)
cost
=
layers
.
square_error_cost
(
input
=
y_predict
,
label
=
y
)
cost
=
layers
.
square_error_cost
(
input
=
y_predict
,
label
=
y
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
sgd_optimizer
=
optimizer
.
SGDOptimizer
(
learning_rate
=
0.001
)
sgd_optimizer
=
SGDOptimizer
(
learning_rate
=
0.001
)
opts
=
sgd_optimizer
.
minimize
(
avg_cost
)
BATCH_SIZE
=
20
...
...
python/paddle/v2/fluid/tests/book/test_image_classification_train.py
浏览文件 @
94e86897
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.nets
as
nets
import
paddle.v2.fluid.optimizer
as
optimizer
from
paddle.v2.fluid.executor
import
Executor
import
paddle.v2.fluid.framework
as
framework
from
paddle.v2.fluid.initializer
import
XavierInitializer
from
paddle.v2.fluid.optimizer
import
AdamOptimizer
def
resnet_cifar10
(
input
,
depth
=
32
):
def
conv_bn_layer
(
input
,
ch_out
,
filter_size
,
stride
,
padding
,
act
=
'relu'
):
def
conv_bn_layer
(
input
,
ch_out
,
filter_size
,
stride
,
padding
,
act
=
'relu'
):
tmp
=
layers
.
conv2d
(
input
=
input
,
filter_size
=
filter_size
,
...
...
@@ -24,9 +19,7 @@ def resnet_cifar10(input, depth=32):
padding
=
padding
,
act
=
None
,
bias_attr
=
False
)
return
layers
.
batch_norm
(
input
=
tmp
,
act
=
act
)
return
layers
.
batch_norm
(
input
=
tmp
,
act
=
act
)
def
shortcut
(
input
,
ch_in
,
ch_out
,
stride
,
program
,
init_program
):
if
ch_in
!=
ch_out
:
...
...
@@ -35,28 +28,11 @@ def resnet_cifar10(input, depth=32):
else
:
return
input
def
basicblock
(
input
,
ch_in
,
ch_out
,
stride
):
tmp
=
conv_bn_layer
(
input
,
ch_out
,
3
,
stride
,
1
)
tmp
=
conv_bn_layer
(
tmp
,
ch_out
,
3
,
1
,
1
,
act
=
None
)
def
basicblock
(
input
,
ch_in
,
ch_out
,
stride
):
tmp
=
conv_bn_layer
(
input
,
ch_out
,
3
,
stride
,
1
)
tmp
=
conv_bn_layer
(
tmp
,
ch_out
,
3
,
1
,
1
,
act
=
None
)
short
=
shortcut
(
input
,
ch_in
,
ch_out
,
stride
)
return
layers
.
elementwise_add
(
x
=
tmp
,
y
=
short
,
act
=
'relu'
)
return
layers
.
elementwise_add
(
x
=
tmp
,
y
=
short
,
act
=
'relu'
)
def
layer_warp
(
block_func
,
input
,
ch_in
,
ch_out
,
count
,
stride
):
tmp
=
block_func
(
input
,
ch_in
,
ch_out
,
stride
)
...
...
@@ -67,45 +43,17 @@ def resnet_cifar10(input, depth=32):
assert
(
depth
-
2
)
%
6
==
0
n
=
(
depth
-
2
)
/
6
conv1
=
conv_bn_layer
(
input
=
input
,
ch_out
=
16
,
filter_size
=
3
,
stride
=
1
,
padding
=
1
)
res1
=
layer_warp
(
basicblock
,
conv1
,
16
,
16
,
n
,
1
)
res2
=
layer_warp
(
basicblock
,
res1
,
16
,
32
,
n
,
2
)
res3
=
layer_warp
(
basicblock
,
res2
,
32
,
64
,
n
,
2
)
input
=
input
,
ch_out
=
16
,
filter_size
=
3
,
stride
=
1
,
padding
=
1
)
res1
=
layer_warp
(
basicblock
,
conv1
,
16
,
16
,
n
,
1
)
res2
=
layer_warp
(
basicblock
,
res1
,
16
,
32
,
n
,
2
)
res3
=
layer_warp
(
basicblock
,
res2
,
32
,
64
,
n
,
2
)
pool
=
layers
.
pool2d
(
input
=
res3
,
pool_size
=
8
,
pool_type
=
'avg'
,
pool_stride
=
1
)
input
=
res3
,
pool_size
=
8
,
pool_type
=
'avg'
,
pool_stride
=
1
)
return
pool
def
vgg16_bn_drop
(
input
):
def
conv_block
(
input
,
num_filter
,
groups
,
dropouts
):
def
conv_block
(
input
,
num_filter
,
groups
,
dropouts
):
return
nets
.
img_conv_group
(
input
=
input
,
pool_size
=
2
,
...
...
@@ -123,22 +71,14 @@ def vgg16_bn_drop(input):
conv4
=
conv_block
(
conv3
,
512
,
3
,
[
0.4
,
0.4
,
0
])
conv5
=
conv_block
(
conv4
,
512
,
3
,
[
0.4
,
0.4
,
0
])
drop
=
layers
.
dropout
(
x
=
conv5
,
dropout_prob
=
0.5
)
drop
=
layers
.
dropout
(
x
=
conv5
,
dropout_prob
=
0.5
)
fc1
=
layers
.
fc
(
input
=
drop
,
size
=
512
,
act
=
None
,
param_attr
=
{
"initializer"
:
XavierInitializer
()})
reshape1
=
layers
.
reshape
(
x
=
fc1
,
shape
=
list
(
fc1
.
shape
+
(
1
,
1
)))
bn
=
layers
.
batch_norm
(
input
=
reshape1
,
act
=
'relu'
)
drop2
=
layers
.
dropout
(
x
=
bn
,
dropout_prob
=
0.5
)
reshape1
=
layers
.
reshape
(
x
=
fc1
,
shape
=
list
(
fc1
.
shape
+
(
1
,
1
)))
bn
=
layers
.
batch_norm
(
input
=
reshape1
,
act
=
'relu'
)
drop2
=
layers
.
dropout
(
x
=
bn
,
dropout_prob
=
0.5
)
fc2
=
layers
.
fc
(
input
=
drop2
,
size
=
512
,
act
=
None
,
...
...
@@ -165,8 +105,8 @@ cost = layers.cross_entropy(input=predict, label=label)
avg_cost
=
layers
.
mean
(
x
=
cost
)
accuracy
=
layers
.
accuracy
(
input
=
predict
,
label
=
label
)
# optimizer =
optimizer.
SGDOptimizer(learning_rate=0.001)
optimizer
=
optimizer
.
AdamOptimizer
(
learning_rate
=
0.001
)
# optimizer = SGDOptimizer(learning_rate=0.001)
optimizer
=
AdamOptimizer
(
learning_rate
=
0.001
)
opts
=
optimizer
.
minimize
(
avg_cost
)
BATCH_SIZE
=
128
...
...
python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py
浏览文件 @
94e86897
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.nets
as
nets
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.optimizer
as
optimizer
import
paddle.v2.fluid.evaluator
as
evaluator
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.nets
as
nets
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.optimizer
import
AdamOptimizer
import
numpy
as
np
images
=
layers
.
data
(
name
=
'pixel'
,
shape
=
[
1
,
28
,
28
],
data_type
=
'float32'
)
label
=
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
data_type
=
'int64'
)
images
=
layers
.
data
(
name
=
'pixel'
,
shape
=
[
1
,
28
,
28
],
data_type
=
'float32'
)
label
=
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
data_type
=
'int64'
)
conv_pool_1
=
nets
.
simple_img_conv_pool
(
input
=
images
,
filter_size
=
5
,
...
...
@@ -32,17 +25,13 @@ conv_pool_2 = nets.simple_img_conv_pool(
pool_stride
=
2
,
act
=
"relu"
)
predict
=
layers
.
fc
(
input
=
conv_pool_2
,
size
=
10
,
act
=
"softmax"
)
predict
=
layers
.
fc
(
input
=
conv_pool_2
,
size
=
10
,
act
=
"softmax"
)
cost
=
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
optimizer
=
optimizer
.
AdamOptimizer
(
learning_rate
=
0.01
,
beta1
=
0.9
,
beta2
=
0.999
)
optimizer
=
AdamOptimizer
(
learning_rate
=
0.01
,
beta1
=
0.9
,
beta2
=
0.999
)
opts
=
optimizer
.
minimize
(
avg_cost
)
accuracy
,
acc_out
=
evaluator
.
accuracy
(
input
=
predict
,
label
=
label
)
accuracy
,
acc_out
=
evaluator
.
accuracy
(
input
=
predict
,
label
=
label
)
BATCH_SIZE
=
50
PASS_NUM
=
3
...
...
python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
浏览文件 @
94e86897
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.optimizer
as
optimizer
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.regularizer
import
L2DecayRegularizer
from
paddle.v2.fluid.initializer
import
UniformInitializer
import
numpy
as
np
from
paddle.v2.fluid.optimizer
import
MomentumOptimizer
from
paddle.v2.fluid.regularizer
import
L2DecayRegularizer
BATCH_SIZE
=
128
image
=
layers
.
data
(
name
=
'x'
,
shape
=
[
784
],
data_type
=
'float32'
)
image
=
layers
.
data
(
name
=
'x'
,
shape
=
[
784
],
data_type
=
'float32'
)
param_attr
=
{
'name'
:
None
,
...
...
@@ -22,32 +18,21 @@ param_attr = {
'regularization'
:
L2DecayRegularizer
(
0.0005
*
BATCH_SIZE
)
}
hidden1
=
layers
.
fc
(
input
=
image
,
size
=
128
,
act
=
'relu'
,
param_attr
=
param_attr
)
hidden2
=
layers
.
fc
(
input
=
hidden1
,
size
=
64
,
act
=
'relu'
,
param_attr
=
param_attr
)
hidden1
=
layers
.
fc
(
input
=
image
,
size
=
128
,
act
=
'relu'
,
param_attr
=
param_attr
)
hidden2
=
layers
.
fc
(
input
=
hidden1
,
size
=
64
,
act
=
'relu'
,
param_attr
=
param_attr
)
predict
=
layers
.
fc
(
input
=
hidden2
,
size
=
10
,
act
=
'softmax'
,
param_attr
=
param_attr
)
label
=
layers
.
data
(
name
=
'y'
,
shape
=
[
1
],
data_type
=
'int64'
)
label
=
layers
.
data
(
name
=
'y'
,
shape
=
[
1
],
data_type
=
'int64'
)
cost
=
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
accuracy
=
layers
.
accuracy
(
input
=
predict
,
label
=
label
)
accuracy
=
layers
.
accuracy
(
input
=
predict
,
label
=
label
)
optimizer
=
optimizer
.
MomentumOptimizer
(
learning_rate
=
0.001
,
momentum
=
0.9
)
optimizer
=
MomentumOptimizer
(
learning_rate
=
0.001
,
momentum
=
0.9
)
opts
=
optimizer
.
minimize
(
avg_cost
)
train_reader
=
paddle
.
batch
(
...
...
python/paddle/v2/fluid/tests/book/test_recommender_system.py
浏览文件 @
94e86897
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.nets
as
nets
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.optimizer
as
optimizer
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.nets
as
nets
from
paddle.v2.fluid.executor
import
Executor
import
numpy
as
np
from
paddle.v2.fluid.optimizer
import
SGDOptimizer
IS_SPARSE
=
True
USE_GPU
=
False
...
...
@@ -19,10 +18,7 @@ def get_usr_combined_features():
USR_DICT_SIZE
=
paddle
.
dataset
.
movielens
.
max_user_id
()
+
1
uid
=
layers
.
data
(
name
=
'user_id'
,
shape
=
[
1
],
data_type
=
'int64'
)
uid
=
layers
.
data
(
name
=
'user_id'
,
shape
=
[
1
],
data_type
=
'int64'
)
usr_emb
=
layers
.
embedding
(
input
=
uid
,
...
...
@@ -31,15 +27,11 @@ def get_usr_combined_features():
param_attr
=
{
'name'
:
'user_table'
},
is_sparse
=
IS_SPARSE
)
usr_fc
=
layers
.
fc
(
input
=
usr_emb
,
size
=
32
)
usr_fc
=
layers
.
fc
(
input
=
usr_emb
,
size
=
32
)
USR_GENDER_DICT_SIZE
=
2
usr_gender_id
=
layers
.
data
(
name
=
'gender_id'
,
shape
=
[
1
],
data_type
=
'int64'
)
usr_gender_id
=
layers
.
data
(
name
=
'gender_id'
,
shape
=
[
1
],
data_type
=
'int64'
)
usr_gender_emb
=
layers
.
embedding
(
input
=
usr_gender_id
,
...
...
@@ -47,14 +39,10 @@ def get_usr_combined_features():
param_attr
=
{
'name'
:
'gender_table'
},
is_sparse
=
IS_SPARSE
)
usr_gender_fc
=
layers
.
fc
(
input
=
usr_gender_emb
,
size
=
16
)
usr_gender_fc
=
layers
.
fc
(
input
=
usr_gender_emb
,
size
=
16
)
USR_AGE_DICT_SIZE
=
len
(
paddle
.
dataset
.
movielens
.
age_table
)
usr_age_id
=
layers
.
data
(
name
=
'age_id'
,
shape
=
[
1
],
data_type
=
"int64"
)
usr_age_id
=
layers
.
data
(
name
=
'age_id'
,
shape
=
[
1
],
data_type
=
"int64"
)
usr_age_emb
=
layers
.
embedding
(
input
=
usr_age_id
,
...
...
@@ -62,14 +50,10 @@ def get_usr_combined_features():
is_sparse
=
IS_SPARSE
,
param_attr
=
{
'name'
:
'age_table'
})
usr_age_fc
=
layers
.
fc
(
input
=
usr_age_emb
,
size
=
16
)
usr_age_fc
=
layers
.
fc
(
input
=
usr_age_emb
,
size
=
16
)
USR_JOB_DICT_SIZE
=
paddle
.
dataset
.
movielens
.
max_job_id
()
+
1
usr_job_id
=
layers
.
data
(
name
=
'job_id'
,
shape
=
[
1
],
data_type
=
"int64"
)
usr_job_id
=
layers
.
data
(
name
=
'job_id'
,
shape
=
[
1
],
data_type
=
"int64"
)
usr_job_emb
=
layers
.
embedding
(
input
=
usr_job_id
,
...
...
@@ -77,16 +61,12 @@ def get_usr_combined_features():
param_attr
=
{
'name'
:
'job_table'
},
is_sparse
=
IS_SPARSE
)
usr_job_fc
=
layers
.
fc
(
input
=
usr_job_emb
,
size
=
16
)
usr_job_fc
=
layers
.
fc
(
input
=
usr_job_emb
,
size
=
16
)
concat_embed
=
layers
.
concat
(
input
=
[
usr_fc
,
usr_gender_fc
,
usr_age_fc
,
usr_job_fc
],
axis
=
1
)
input
=
[
usr_fc
,
usr_gender_fc
,
usr_age_fc
,
usr_job_fc
],
axis
=
1
)
usr_combined_features
=
layers
.
fc
(
input
=
concat_embed
,
size
=
200
,
act
=
"tanh"
)
usr_combined_features
=
layers
.
fc
(
input
=
concat_embed
,
size
=
200
,
act
=
"tanh"
)
return
usr_combined_features
...
...
@@ -95,10 +75,7 @@ def get_mov_combined_features():
MOV_DICT_SIZE
=
paddle
.
dataset
.
movielens
.
max_movie_id
()
+
1
mov_id
=
layers
.
data
(
name
=
'movie_id'
,
shape
=
[
1
],
data_type
=
'int64'
)
mov_id
=
layers
.
data
(
name
=
'movie_id'
,
shape
=
[
1
],
data_type
=
'int64'
)
mov_emb
=
layers
.
embedding
(
input
=
mov_id
,
...
...
@@ -107,36 +84,24 @@ def get_mov_combined_features():
param_attr
=
{
'name'
:
'movie_table'
},
is_sparse
=
IS_SPARSE
)
mov_fc
=
layers
.
fc
(
input
=
mov_emb
,
size
=
32
)
mov_fc
=
layers
.
fc
(
input
=
mov_emb
,
size
=
32
)
CATEGORY_DICT_SIZE
=
len
(
paddle
.
dataset
.
movielens
.
movie_categories
())
category_id
=
layers
.
data
(
name
=
'category_id'
,
shape
=
[
1
],
data_type
=
'int64'
)
category_id
=
layers
.
data
(
name
=
'category_id'
,
shape
=
[
1
],
data_type
=
'int64'
)
mov_categories_emb
=
layers
.
embedding
(
input
=
category_id
,
size
=
[
CATEGORY_DICT_SIZE
,
32
],
is_sparse
=
IS_SPARSE
)
input
=
category_id
,
size
=
[
CATEGORY_DICT_SIZE
,
32
],
is_sparse
=
IS_SPARSE
)
mov_categories_hidden
=
layers
.
sequence_pool
(
input
=
mov_categories_emb
,
pool_type
=
"sum"
)
input
=
mov_categories_emb
,
pool_type
=
"sum"
)
MOV_TITLE_DICT_SIZE
=
len
(
paddle
.
dataset
.
movielens
.
get_movie_title_dict
())
mov_title_id
=
layers
.
data
(
name
=
'movie_title'
,
shape
=
[
1
],
data_type
=
'int64'
)
mov_title_id
=
layers
.
data
(
name
=
'movie_title'
,
shape
=
[
1
],
data_type
=
'int64'
)
mov_title_emb
=
layers
.
embedding
(
input
=
mov_title_id
,
size
=
[
MOV_TITLE_DICT_SIZE
,
32
],
is_sparse
=
IS_SPARSE
)
input
=
mov_title_id
,
size
=
[
MOV_TITLE_DICT_SIZE
,
32
],
is_sparse
=
IS_SPARSE
)
mov_title_conv
=
nets
.
sequence_conv_pool
(
input
=
mov_title_emb
,
...
...
@@ -146,13 +111,10 @@ def get_mov_combined_features():
pool_type
=
"sum"
)
concat_embed
=
layers
.
concat
(
input
=
[
mov_fc
,
mov_categories_hidden
,
mov_title_conv
],
axis
=
1
)
input
=
[
mov_fc
,
mov_categories_hidden
,
mov_title_conv
],
axis
=
1
)
# FIXME(dzh) : need tanh operator
mov_combined_features
=
layers
.
fc
(
input
=
concat_embed
,
size
=
200
,
act
=
"tanh"
)
mov_combined_features
=
layers
.
fc
(
input
=
concat_embed
,
size
=
200
,
act
=
"tanh"
)
return
mov_combined_features
...
...
@@ -162,18 +124,11 @@ def model():
mov_combined_features
=
get_mov_combined_features
()
# need cos sim
inference
=
layers
.
cos_sim
(
X
=
usr_combined_features
,
Y
=
mov_combined_features
)
inference
=
layers
.
cos_sim
(
X
=
usr_combined_features
,
Y
=
mov_combined_features
)
label
=
layers
.
data
(
name
=
'score'
,
shape
=
[
1
],
data_type
=
'float32'
)
label
=
layers
.
data
(
name
=
'score'
,
shape
=
[
1
],
data_type
=
'float32'
)
square_cost
=
layers
.
square_error_cost
(
input
=
inference
,
label
=
label
)
square_cost
=
layers
.
square_error_cost
(
input
=
inference
,
label
=
label
)
avg_cost
=
layers
.
mean
(
x
=
square_cost
)
...
...
@@ -182,7 +137,7 @@ def model():
def
main
():
cost
=
model
()
sgd_optimizer
=
optimizer
.
SGDOptimizer
(
learning_rate
=
0.2
)
sgd_optimizer
=
SGDOptimizer
(
learning_rate
=
0.2
)
opts
=
sgd_optimizer
.
minimize
(
cost
)
if
USE_GPU
:
...
...
python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py
浏览文件 @
94e86897
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.nets
as
nets
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.optimizer
as
optimizer
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.nets
as
nets
from
paddle.v2.fluid.executor
import
Executor
import
numpy
as
np
from
paddle.v2.fluid.optimizer
import
AdamOptimizer
def
convolution_net
(
input_dim
,
class_dim
=
2
,
emb_dim
=
32
,
hid_dim
=
32
):
...
...
@@ -31,7 +30,7 @@ def convolution_net(input_dim, class_dim=2, emb_dim=32, hid_dim=32):
act
=
"softmax"
)
cost
=
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
adam_optimizer
=
optimizer
.
AdamOptimizer
(
learning_rate
=
0.002
)
adam_optimizer
=
AdamOptimizer
(
learning_rate
=
0.002
)
opts
=
adam_optimizer
.
minimize
(
avg_cost
)
acc
=
layers
.
accuracy
(
input
=
prediction
,
label
=
label
)
return
avg_cost
,
acc
...
...
python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py
浏览文件 @
94e86897
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.nets
as
nets
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.optimizer
as
optimizer
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
from
paddle.v2.fluid.executor
import
Executor
import
numpy
as
np
from
paddle.v2.fluid.optimizer
import
AdamOptimizer
def
stacked_lstm_net
(
input_dim
,
...
...
@@ -41,7 +39,7 @@ def stacked_lstm_net(input_dim,
act
=
'softmax'
)
cost
=
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
adam_optimizer
=
optimizer
.
AdamOptimizer
(
learning_rate
=
0.002
)
adam_optimizer
=
AdamOptimizer
(
learning_rate
=
0.002
)
opts
=
adam_optimizer
.
minimize
(
avg_cost
)
acc
=
layers
.
accuracy
(
input
=
prediction
,
label
=
label
)
return
avg_cost
,
acc
...
...
python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py
浏览文件 @
94e86897
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.optimizer
as
optimizer
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
from
paddle.v2.fluid.executor
import
Executor
import
numpy
as
np
from
paddle.v2.fluid.optimizer
import
AdamOptimizer
def
lstm_net
(
dict_dim
,
class_dim
=
2
,
emb_dim
=
32
,
seq_len
=
80
,
batch_size
=
50
):
...
...
@@ -33,7 +32,7 @@ def lstm_net(dict_dim, class_dim=2, emb_dim=32, seq_len=80, batch_size=50):
cost
=
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
adam_optimizer
=
optimizer
.
AdamOptimizer
(
learning_rate
=
0.002
)
adam_optimizer
=
AdamOptimizer
(
learning_rate
=
0.002
)
opts
=
adam_optimizer
.
minimize
(
avg_cost
)
acc
=
layers
.
accuracy
(
input
=
prediction
,
label
=
label
)
...
...
python/paddle/v2/fluid/tests/book/test_word2vec.py
浏览文件 @
94e86897
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.optimizer
as
optimizer
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
from
paddle.v2.fluid.executor
import
Executor
import
numpy
as
np
from
paddle.v2.fluid.optimizer
import
SGDOptimizer
PASS_NUM
=
100
EMBED_SIZE
=
32
...
...
@@ -17,26 +16,11 @@ IS_SPARSE = True
word_dict
=
paddle
.
dataset
.
imikolov
.
build_dict
()
dict_size
=
len
(
word_dict
)
first_word
=
layers
.
data
(
name
=
'firstw'
,
shape
=
[
1
],
data_type
=
'int64'
)
second_word
=
layers
.
data
(
name
=
'secondw'
,
shape
=
[
1
],
data_type
=
'int64'
)
third_word
=
layers
.
data
(
name
=
'thirdw'
,
shape
=
[
1
],
data_type
=
'int64'
)
forth_word
=
layers
.
data
(
name
=
'forthw'
,
shape
=
[
1
],
data_type
=
'int64'
)
next_word
=
layers
.
data
(
name
=
'nextw'
,
shape
=
[
1
],
data_type
=
'int64'
)
first_word
=
layers
.
data
(
name
=
'firstw'
,
shape
=
[
1
],
data_type
=
'int64'
)
second_word
=
layers
.
data
(
name
=
'secondw'
,
shape
=
[
1
],
data_type
=
'int64'
)
third_word
=
layers
.
data
(
name
=
'thirdw'
,
shape
=
[
1
],
data_type
=
'int64'
)
forth_word
=
layers
.
data
(
name
=
'forthw'
,
shape
=
[
1
],
data_type
=
'int64'
)
next_word
=
layers
.
data
(
name
=
'nextw'
,
shape
=
[
1
],
data_type
=
'int64'
)
embed_first
=
layers
.
embedding
(
input
=
first_word
,
...
...
@@ -64,19 +48,12 @@ embed_forth = layers.embedding(
param_attr
=
{
'name'
:
'shared_w'
})
concat_embed
=
layers
.
concat
(
input
=
[
embed_first
,
embed_second
,
embed_third
,
embed_forth
],
axis
=
1
)
hidden1
=
layers
.
fc
(
input
=
concat_embed
,
size
=
HIDDEN_SIZE
,
act
=
'sigmoid'
)
predict_word
=
layers
.
fc
(
input
=
hidden1
,
size
=
dict_size
,
act
=
'softmax'
)
cost
=
layers
.
cross_entropy
(
input
=
predict_word
,
label
=
next_word
)
input
=
[
embed_first
,
embed_second
,
embed_third
,
embed_forth
],
axis
=
1
)
hidden1
=
layers
.
fc
(
input
=
concat_embed
,
size
=
HIDDEN_SIZE
,
act
=
'sigmoid'
)
predict_word
=
layers
.
fc
(
input
=
hidden1
,
size
=
dict_size
,
act
=
'softmax'
)
cost
=
layers
.
cross_entropy
(
input
=
predict_word
,
label
=
next_word
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
sgd_optimizer
=
optimizer
.
SGDOptimizer
(
learning_rate
=
0.001
)
sgd_optimizer
=
SGDOptimizer
(
learning_rate
=
0.001
)
opts
=
sgd_optimizer
.
minimize
(
avg_cost
)
train_reader
=
paddle
.
batch
(
...
...
python/paddle/v2/fluid/tests/test_conv2d_op.py
浏览文件 @
94e86897
...
...
@@ -110,13 +110,30 @@ class TestConv2dOp(OpTest):
self
.
op_type
=
"conv2d"
class
TestWithPad
(
TestConv2dOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
1
,
1
]
self
.
stride
=
[
1
,
1
]
self
.
input_size
=
[
2
,
3
,
5
,
5
]
# NCHW
assert
np
.
mod
(
self
.
input_size
[
1
],
self
.
groups
)
==
0
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
3
,
3
]
class
TestWithStride
(
TestConv2dOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
1
,
1
]
self
.
stride
=
[
2
,
2
]
self
.
input_size
=
[
2
,
3
,
6
,
6
]
# NCHW
assert
np
.
mod
(
self
.
input_size
[
1
],
self
.
groups
)
==
0
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
3
,
3
]
class
TestWithGroup
(
TestConv2dOp
):
def
init_group
(
self
):
self
.
groups
=
3
def
init_op_type
(
self
):
self
.
op_type
=
"conv2d"
class
TestWith1x1
(
TestConv2dOp
):
def
init_test_case
(
self
):
...
...
@@ -127,15 +144,9 @@ class TestWith1x1(TestConv2dOp):
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
1
,
1
]
def
init_dilation
(
self
):
self
.
dilations
=
[
1
,
1
]
def
init_group
(
self
):
self
.
groups
=
3
def
init_op_type
(
self
):
self
.
op_type
=
"conv2d"
class
TestWithDilation
(
TestConv2dOp
):
def
init_test_case
(
self
):
...
...
@@ -152,14 +163,19 @@ class TestWithDilation(TestConv2dOp):
def
init_group
(
self
):
self
.
groups
=
3
#----------------Conv2dCudnn----------------
class
TestCudnn
(
TestConv2dOp
):
def
init_op_type
(
self
):
self
.
op_type
=
"conv
2d
"
self
.
op_type
=
"conv
_cudnn
"
#----------------Conv2dCudnn----------------
class
TestCudnnWithPad
(
TestWithPad
):
def
init_op_type
(
self
):
self
.
op_type
=
"conv_cudnn"
class
TestCudnn
(
TestConv2dOp
):
class
TestCudnn
WithStride
(
TestWithStride
):
def
init_op_type
(
self
):
self
.
op_type
=
"conv_cudnn"
...
...
python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py
浏览文件 @
94e86897
...
...
@@ -4,9 +4,7 @@ from op_test import OpTest
def
conv2dtranspose_forward_naive
(
input_
,
filter_
,
conv2dtranspose_param
):
# [2, 3, 5, 5]
in_n
,
in_c
,
in_h
,
in_w
=
input_
.
shape
# [3, 6, 3, 3]
f_c
,
out_c
,
f_h
,
f_w
=
filter_
.
shape
assert
in_c
==
f_c
...
...
@@ -29,6 +27,7 @@ def conv2dtranspose_forward_naive(input_, filter_, conv2dtranspose_param):
j1
,
j2
=
j
*
stride
[
0
],
j
*
stride
[
0
]
+
f_w
out
[
n
,
k
,
i1
:
i2
,
j1
:
j2
]
+=
tmp_out
out
=
out
[:,
:,
pad
[
0
]:
out_h
-
pad
[
0
],
pad
[
1
]:
out_w
-
pad
[
1
]]
return
out
...
...
@@ -36,8 +35,6 @@ class TestConv2dTransposeOp(OpTest):
def
setUp
(
self
):
# init as conv transpose
self
.
init_op_type
()
# [2, 3, 5, 5] -> kernel [3, 6, 3, 3] -> output [2, 6, 7, 7]
self
.
init_test_case
()
conv2dtranspose_param
=
{
'stride'
:
self
.
stride
,
'pad'
:
self
.
pad
}
...
...
@@ -55,7 +52,6 @@ class TestConv2dTransposeOp(OpTest):
self
.
outputs
=
{
'Output'
:
output
}
def
test_check_output
(
self
):
print
'check output here for'
,
self
.
op_type
self
.
check_output
()
def
test_check_grad_no_input
(
self
):
...
...
@@ -88,6 +84,26 @@ class TestConv2dTransposeOp(OpTest):
self
.
op_type
=
"conv2d_transpose"
class
TestWithPad
(
TestConv2dTransposeOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
1
,
1
]
self
.
stride
=
[
1
,
1
]
self
.
dilations
=
[
1
,
1
]
self
.
input_size
=
[
2
,
3
,
5
,
5
]
# NCHW
f_c
=
self
.
input_size
[
1
]
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
]
class
TestWithStride
(
TestConv2dTransposeOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
1
,
1
]
self
.
stride
=
[
2
,
2
]
self
.
dilations
=
[
1
,
1
]
self
.
input_size
=
[
2
,
3
,
5
,
5
]
# NCHW
f_c
=
self
.
input_size
[
1
]
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
]
# ------------ test_cudnn ------------
class
TestCudnn
(
TestConv2dTransposeOp
):
def
init_op_type
(
self
):
...
...
python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py
浏览文件 @
94e86897
...
...
@@ -4,9 +4,7 @@ from op_test import OpTest
def
conv3dtranspose_forward_naive
(
input_
,
filter_
,
conv3dtranspose_param
):
# [2, 3, 5, 5, 5]
in_n
,
in_c
,
in_d
,
in_h
,
in_w
=
input_
.
shape
# [3, 6, 3, 3, 3]
f_c
,
out_c
,
f_d
,
f_h
,
f_w
=
filter_
.
shape
assert
in_c
==
f_c
...
...
@@ -14,7 +12,6 @@ def conv3dtranspose_forward_naive(input_, filter_, conv3dtranspose_param):
out_d
=
(
in_d
-
1
)
*
stride
[
0
]
+
f_d
out_h
=
(
in_h
-
1
)
*
stride
[
1
]
+
f_h
out_w
=
(
in_w
-
1
)
*
stride
[
2
]
+
f_w
out
=
np
.
zeros
((
in_n
,
out_c
,
out_d
,
out_h
,
out_w
))
for
n
in
range
(
in_n
):
...
...
@@ -33,6 +30,8 @@ def conv3dtranspose_forward_naive(input_, filter_, conv3dtranspose_param):
j1
,
j2
=
j
*
stride
[
2
],
j
*
stride
[
2
]
+
f_w
out
[
n
,
k
,
d1
:
d2
,
i1
:
i2
,
j1
:
j2
]
+=
tmp_out
out
=
out
[:,
:,
pad
[
0
]:
out_d
-
pad
[
0
],
pad
[
1
]:
out_h
-
pad
[
1
],
pad
[
2
]:
out_w
-
pad
[
2
]]
return
out
...
...
@@ -40,8 +39,6 @@ class TestConv3dTransposeOp(OpTest):
def
setUp
(
self
):
# init as conv transpose
self
.
init_op_type
()
# [2, 3, 5, 5, 5] -> kernel [3, 6, 3, 3, 3] -> output [2, 6, 7, 7, 7]
self
.
init_test_case
()
conv3dtranspose_param
=
{
'stride'
:
self
.
stride
,
'pad'
:
self
.
pad
}
...
...
@@ -49,7 +46,6 @@ class TestConv3dTransposeOp(OpTest):
filter_
=
np
.
random
.
random
(
self
.
filter_size
).
astype
(
"float32"
)
output
=
conv3dtranspose_forward_naive
(
input_
,
filter_
,
conv3dtranspose_param
).
astype
(
"float32"
)
# print 'deconv output py', output, output.shape
self
.
inputs
=
{
'Input'
:
input_
,
'Filter'
:
filter_
}
self
.
attrs
=
{
...
...
@@ -60,7 +56,6 @@ class TestConv3dTransposeOp(OpTest):
self
.
outputs
=
{
'Output'
:
output
}
def
test_check_output
(
self
):
print
'check output here'
self
.
check_output
()
def
test_check_grad
(
self
):
...
...
@@ -85,7 +80,7 @@ class TestConv3dTransposeOp(OpTest):
self
.
pad
=
[
0
,
0
,
0
]
self
.
stride
=
[
1
,
1
,
1
]
self
.
dilations
=
[
1
,
1
,
1
]
self
.
input_size
=
[
2
,
3
,
5
,
5
,
5
]
# NCHW
self
.
input_size
=
[
2
,
3
,
5
,
5
,
5
]
# NC
D
HW
f_c
=
self
.
input_size
[
1
]
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
,
3
]
...
...
@@ -93,5 +88,25 @@ class TestConv3dTransposeOp(OpTest):
self
.
op_type
=
"conv3d_transpose"
class
TestWithPad
(
TestConv3dTransposeOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
1
,
1
,
1
]
self
.
stride
=
[
1
,
1
,
1
]
self
.
dilations
=
[
1
,
1
,
1
]
self
.
input_size
=
[
2
,
3
,
5
,
5
,
5
]
# NCDHW
f_c
=
self
.
input_size
[
1
]
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
,
3
]
class
TestWithStride
(
TestConv3dTransposeOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
1
,
1
,
1
]
self
.
stride
=
[
2
,
2
,
2
]
self
.
dilations
=
[
1
,
1
,
1
]
self
.
input_size
=
[
2
,
3
,
5
,
5
,
5
]
# NCDHW
f_c
=
self
.
input_size
[
1
]
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
,
3
]
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/v2/fluid/tests/test_is_empty_op.py
0 → 100644
浏览文件 @
94e86897
import
unittest
import
numpy
as
np
from
paddle.v2.fluid.op
import
Operator
import
paddle.v2.fluid.core
as
core
def
create_tensor
(
scope
,
name
,
np_data
):
tensor
=
scope
.
var
(
name
).
get_tensor
()
tensor
.
set_dims
(
np_data
.
shape
)
tensor
.
set
(
np_data
,
core
.
CPUPlace
())
return
tensor
class
TestIsEmptyOp
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
scope
=
core
.
Scope
()
# create input variables
np_data0
=
np
.
array
([
0
,
1
,
2
])
create_tensor
(
self
.
scope
,
"X0"
,
np_data0
)
np_data1
=
np
.
array
([
1
])
t
=
create_tensor
(
self
.
scope
,
"X1"
,
np_data1
)
t
.
set_dims
([
0
])
# create output variables
self
.
scope
.
var
(
"out"
)
def
test_no_empty
(
self
):
self
.
one_case
(
"X0"
,
False
)
def
test_empty
(
self
):
self
.
one_case
(
"X1"
,
True
)
def
one_case
(
self
,
input
,
target
):
op
=
Operator
(
type
=
"is_empty"
,
X
=
input
,
Out
=
"out"
)
ctx
=
core
.
DeviceContext
.
create
(
core
.
CPUPlace
())
op
.
run
(
self
.
scope
,
ctx
)
out
=
self
.
scope
.
var
(
"out"
).
get_tensor
()
self
.
assertEqual
(
np
.
array
(
out
)[
0
],
target
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/v2/fluid/tests/test_while_op.py
浏览文件 @
94e86897
...
...
@@ -2,6 +2,7 @@ import unittest
import
paddle.v2.fluid.layers
as
layers
from
paddle.v2.fluid.executor
import
Executor
import
paddle.v2.fluid.core
as
core
from
paddle.v2.fluid.backward
import
append_backward_ops
import
numpy
...
...
@@ -16,7 +17,7 @@ class TestWhileOp(unittest.TestCase):
i
=
layers
.
zeros
(
shape
=
[
1
],
dtype
=
'int64'
)
i
.
stop_gradient
=
True
init
=
layers
.
zeros
(
shape
=
[
10
],
dtype
=
'float32'
)
mem_array
=
layers
.
array_write
(
init
,
i
=
i
)
mem_array
=
layers
.
array_write
(
x
=
init
,
i
=
i
)
data_array
=
layers
.
array_write
(
x
=
d0
,
i
=
i
)
i
=
layers
.
increment
(
i
)
...
...
@@ -29,17 +30,23 @@ class TestWhileOp(unittest.TestCase):
i
.
stop_gradient
=
True
array_len
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'int64'
,
value
=
3
)
array_len
.
stop_gradient
=
True
cond
=
layers
.
less_than
(
x
=
i
,
y
=
array_len
)
while_op
=
layers
.
While
(
cond
=
cond
)
with
while_op
.
block
():
d
=
layers
.
array_read
(
array
=
data_array
,
i
=
i
)
prev
=
layers
.
array_read
(
array
=
mem_array
,
i
=
i
)
i
=
layers
.
increment
(
x
=
i
,
in_place
=
True
)
result
=
layers
.
sums
(
input
=
[
d
,
prev
])
i
=
layers
.
increment
(
x
=
i
,
in_place
=
True
)
layers
.
array_write
(
result
,
i
=
i
,
array
=
mem_array
)
layers
.
less_than
(
x
=
i
,
y
=
array_len
,
cond
=
cond
)
sum_result
=
layers
.
array_read
(
mem_array
,
i
=
array_len
)
sum_result
=
layers
.
array_read
(
array
=
mem_array
,
i
=
i
)
loss
=
layers
.
mean
(
x
=
sum_result
)
append_backward_ops
(
loss
)
cpu
=
core
.
CPUPlace
()
exe
=
Executor
(
cpu
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录