Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
94e86897
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
94e86897
编写于
11月 17, 2017
作者:
D
dangqingqing
浏览文件
操作
浏览文件
下载
差异文件
update code and fix conflicts.
上级
082bc7af
3375e3e2
变更
64
隐藏空白更改
内联
并排
Showing
64 changed file
with
753 addition
and
494 deletion
+753
-494
CMakeLists.txt
CMakeLists.txt
+19
-7
cmake/configure.cmake
cmake/configure.cmake
+8
-21
cmake/cross_compiling/ios.cmake
cmake/cross_compiling/ios.cmake
+3
-5
cmake/cuda.cmake
cmake/cuda.cmake
+0
-1
cmake/external/mkldnn.cmake
cmake/external/mkldnn.cmake
+7
-7
cmake/external/openblas.cmake
cmake/external/openblas.cmake
+7
-8
cmake/external/warpctc.cmake
cmake/external/warpctc.cmake
+4
-0
cmake/util.cmake
cmake/util.cmake
+2
-2
doc/design/mkldnn/README.MD
doc/design/mkldnn/README.MD
+4
-4
doc/howto/dev/write_docs_cn.rst
doc/howto/dev/write_docs_cn.rst
+1
-1
doc/mobile/cross_compiling_for_android_cn.md
doc/mobile/cross_compiling_for_android_cn.md
+1
-1
doc/mobile/cross_compiling_for_ios_cn.md
doc/mobile/cross_compiling_for_ios_cn.md
+6
-6
doc/mobile/cross_compiling_for_raspberry_cn.md
doc/mobile/cross_compiling_for_raspberry_cn.md
+1
-1
paddle/cuda/include/hl_gpu.h
paddle/cuda/include/hl_gpu.h
+2
-0
paddle/framework/backward.cc
paddle/framework/backward.cc
+49
-19
paddle/framework/data_type.h
paddle/framework/data_type.h
+2
-0
paddle/framework/ddim.cc
paddle/framework/ddim.cc
+1
-2
paddle/framework/executor.cc
paddle/framework/executor.cc
+1
-0
paddle/framework/op_desc.cc
paddle/framework/op_desc.cc
+23
-1
paddle/framework/op_desc.h
paddle/framework/op_desc.h
+4
-0
paddle/framework/operator.cc
paddle/framework/operator.cc
+0
-13
paddle/framework/scope.cc
paddle/framework/scope.cc
+2
-1
paddle/framework/shape_inference.h
paddle/framework/shape_inference.h
+4
-3
paddle/gserver/layers/MKLDNNLayer.cpp
paddle/gserver/layers/MKLDNNLayer.cpp
+1
-1
paddle/math/Storage.cpp
paddle/math/Storage.cpp
+4
-0
paddle/operators/array_operator.h
paddle/operators/array_operator.h
+1
-0
paddle/operators/bilinear_tensor_product_op.h
paddle/operators/bilinear_tensor_product_op.h
+1
-1
paddle/operators/conv_transpose_op.cc
paddle/operators/conv_transpose_op.cc
+1
-6
paddle/operators/conv_transpose_op.h
paddle/operators/conv_transpose_op.h
+2
-4
paddle/operators/cos_sim_op.h
paddle/operators/cos_sim_op.h
+1
-1
paddle/operators/detail/safe_ref.h
paddle/operators/detail/safe_ref.h
+31
-0
paddle/operators/fill_constant_batch_size_like_op.cc
paddle/operators/fill_constant_batch_size_like_op.cc
+4
-1
paddle/operators/fill_constant_batch_size_like_op.cu.cc
paddle/operators/fill_constant_batch_size_like_op.cu.cc
+4
-1
paddle/operators/fill_zeros_like_op.cc
paddle/operators/fill_zeros_like_op.cc
+5
-2
paddle/operators/fill_zeros_like_op.cu.cc
paddle/operators/fill_zeros_like_op.cu.cc
+5
-2
paddle/operators/is_empty_op.cc
paddle/operators/is_empty_op.cc
+67
-0
paddle/operators/math/CMakeLists.txt
paddle/operators/math/CMakeLists.txt
+2
-2
paddle/operators/math/im2col.cu
paddle/operators/math/im2col.cu
+2
-2
paddle/operators/math/math_function.cc
paddle/operators/math/math_function.cc
+2
-0
paddle/operators/math/math_function.cu
paddle/operators/math/math_function.cu
+2
-0
paddle/operators/sum_op.cc
paddle/operators/sum_op.cc
+29
-8
paddle/operators/tensor_array_read_write_op.cc
paddle/operators/tensor_array_read_write_op.cc
+15
-9
paddle/operators/while_op.cc
paddle/operators/while_op.cc
+153
-31
paddle/scripts/docker/README.md
paddle/scripts/docker/README.md
+1
-2
paddle/scripts/docker/build.sh
paddle/scripts/docker/build.sh
+2
-6
paddle/scripts/submit_local.sh.in
paddle/scripts/submit_local.sh.in
+5
-5
paddle/scripts/travis/build_doc.sh
paddle/scripts/travis/build_doc.sh
+1
-1
paddle/trainer/Trainer.cpp
paddle/trainer/Trainer.cpp
+4
-0
python/paddle/v2/fluid/framework.py
python/paddle/v2/fluid/framework.py
+27
-8
python/paddle/v2/fluid/net_drawer.py
python/paddle/v2/fluid/net_drawer.py
+4
-0
python/paddle/v2/fluid/tests/book/test_fit_a_line.py
python/paddle/v2/fluid/tests/book/test_fit_a_line.py
+9
-20
python/paddle/v2/fluid/tests/book/test_image_classification_train.py
...le/v2/fluid/tests/book/test_image_classification_train.py
+20
-80
python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py
.../paddle/v2/fluid/tests/book/test_recognize_digits_conv.py
+9
-20
python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
...n/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
+10
-25
python/paddle/v2/fluid/tests/book/test_recommender_system.py
python/paddle/v2/fluid/tests/book/test_recommender_system.py
+27
-72
python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py
...dle/v2/fluid/tests/book/test_understand_sentiment_conv.py
+5
-6
python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py
...luid/tests/book/test_understand_sentiment_dynamic_lstm.py
+4
-6
python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py
...dle/v2/fluid/tests/book/test_understand_sentiment_lstm.py
+4
-5
python/paddle/v2/fluid/tests/book/test_word2vec.py
python/paddle/v2/fluid/tests/book/test_word2vec.py
+13
-36
python/paddle/v2/fluid/tests/test_conv2d_op.py
python/paddle/v2/fluid/tests/test_conv2d_op.py
+28
-12
python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py
python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py
+21
-5
python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py
python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py
+23
-8
python/paddle/v2/fluid/tests/test_is_empty_op.py
python/paddle/v2/fluid/tests/test_is_empty_op.py
+43
-0
python/paddle/v2/fluid/tests/test_while_op.py
python/paddle/v2/fluid/tests/test_while_op.py
+10
-3
未找到文件。
CMakeLists.txt
浏览文件 @
94e86897
...
@@ -36,8 +36,7 @@ include(simd)
...
@@ -36,8 +36,7 @@ include(simd)
################################ Configurations #######################################
################################ Configurations #######################################
option
(
WITH_GPU
"Compile PaddlePaddle with NVIDIA GPU"
${
CUDA_FOUND
}
)
option
(
WITH_GPU
"Compile PaddlePaddle with NVIDIA GPU"
${
CUDA_FOUND
}
)
option
(
WITH_AVX
"Compile PaddlePaddle with AVX intrinsics"
${
AVX_FOUND
}
)
option
(
WITH_AVX
"Compile PaddlePaddle with AVX intrinsics"
${
AVX_FOUND
}
)
option
(
WITH_MKLDNN
"Compile PaddlePaddle with mkl-dnn support."
${
AVX_FOUND
}
)
option
(
WITH_MKL
"Compile PaddlePaddle with MKL support."
${
AVX_FOUND
}
)
option
(
WITH_MKLML
"Compile PaddlePaddle with mklml package."
${
AVX_FOUND
}
)
option
(
WITH_DSO
"Compile PaddlePaddle with dynamic linked CUDA"
ON
)
option
(
WITH_DSO
"Compile PaddlePaddle with dynamic linked CUDA"
ON
)
option
(
WITH_TESTING
"Compile PaddlePaddle with unit testing"
ON
)
option
(
WITH_TESTING
"Compile PaddlePaddle with unit testing"
ON
)
option
(
WITH_SWIG_PY
"Compile PaddlePaddle with inference api"
ON
)
option
(
WITH_SWIG_PY
"Compile PaddlePaddle with inference api"
ON
)
...
@@ -82,10 +81,8 @@ if(ANDROID OR IOS)
...
@@ -82,10 +81,8 @@ if(ANDROID OR IOS)
"Disable PYTHON when cross-compiling for Android and iOS"
FORCE
)
"Disable PYTHON when cross-compiling for Android and iOS"
FORCE
)
set
(
WITH_RDMA OFF CACHE STRING
set
(
WITH_RDMA OFF CACHE STRING
"Disable RDMA when cross-compiling for Android and iOS"
FORCE
)
"Disable RDMA when cross-compiling for Android and iOS"
FORCE
)
set
(
WITH_MKLDNN OFF CACHE STRING
set
(
WITH_MKL OFF CACHE STRING
"Disable MKLDNN when cross-compiling for Android and iOS"
FORCE
)
"Disable MKL when cross-compiling for Android and iOS"
FORCE
)
set
(
WITH_MKLML OFF CACHE STRING
"Disable MKLML package when cross-compiling for Android and iOS"
FORCE
)
# Compile PaddlePaddle mobile inference library
# Compile PaddlePaddle mobile inference library
if
(
NOT WITH_C_API
)
if
(
NOT WITH_C_API
)
...
@@ -111,6 +108,17 @@ else()
...
@@ -111,6 +108,17 @@ else()
set
(
THIRD_PARTY_BUILD_TYPE Release
)
set
(
THIRD_PARTY_BUILD_TYPE Release
)
endif
()
endif
()
if
(
WITH_MKL
)
set
(
WITH_MKLML ON
)
set
(
WITH_MKLDNN
${
AVX2_FOUND
}
)
if
(
NOT WITH_MKLDNN
)
message
(
WARNING
"Do not have AVX2 intrinsics and disabled MKL-DNN"
)
endif
()
else
()
set
(
WITH_MKLML OFF
)
set
(
WITH_MKLDNN OFF
)
endif
()
########################################################################################
########################################################################################
include
(
external/mklml
)
# download mklml package
include
(
external/mklml
)
# download mklml package
...
@@ -161,8 +169,12 @@ if(WITH_GPU)
...
@@ -161,8 +169,12 @@ if(WITH_GPU)
include
(
cuda
)
include
(
cuda
)
endif
(
WITH_GPU
)
endif
(
WITH_GPU
)
if
(
WITH_MKLML
)
list
(
APPEND EXTERNAL_LIBS
${
MKLML_IOMP_LIB
}
)
endif
()
if
(
WITH_MKLDNN
)
if
(
WITH_MKLDNN
)
list
(
APPEND EXTERNAL_LIBS
${
MKLDNN_LIB
}
${
MKLDNN_IOMP_LIB
}
)
list
(
APPEND EXTERNAL_LIBS
${
MKLDNN_LIB
}
)
endif
()
endif
()
if
(
USE_NNPACK
)
if
(
USE_NNPACK
)
...
...
cmake/configure.cmake
浏览文件 @
94e86897
...
@@ -76,27 +76,14 @@ else()
...
@@ -76,27 +76,14 @@ else()
include_directories
(
${
CUDA_TOOLKIT_INCLUDE
}
)
include_directories
(
${
CUDA_TOOLKIT_INCLUDE
}
)
endif
(
NOT WITH_GPU
)
endif
(
NOT WITH_GPU
)
if
(
WITH_MKLDNN
)
if
(
WITH_MKLML AND MKLML_IOMP_LIB
)
add_definitions
(
-DPADDLE_USE_MKLDNN
)
message
(
STATUS
"Enable Intel OpenMP with
${
MKLML_IOMP_LIB
}
"
)
if
(
WITH_MKLML AND MKLDNN_IOMP_DIR
)
set
(
OPENMP_FLAGS
"-fopenmp"
)
message
(
STATUS
"Enable Intel OpenMP at
${
MKLDNN_IOMP_DIR
}
"
)
set
(
CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS
${
OPENMP_FLAGS
}
)
set
(
OPENMP_FLAGS
"-fopenmp"
)
set
(
CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS
${
OPENMP_FLAGS
}
)
set
(
CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS
${
OPENMP_FLAGS
}
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
${
OPENMP_FLAGS
}
"
)
set
(
CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS
${
OPENMP_FLAGS
}
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
${
OPENMP_FLAGS
}
"
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
${
OPENMP_FLAGS
}
"
)
endif
()
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
${
OPENMP_FLAGS
}
"
)
else
()
find_package
(
OpenMP
)
if
(
OPENMP_FOUND
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
${
OpenMP_C_FLAGS
}
"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
${
OpenMP_CXX_FLAGS
}
"
)
else
()
message
(
WARNING
"Can not find OpenMP."
"Some performance features in MKLDNN may not be available"
)
endif
()
endif
()
endif
(
WITH_MKLDNN
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
${
SIMD_FLAG
}
"
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
${
SIMD_FLAG
}
"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
${
SIMD_FLAG
}
"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
${
SIMD_FLAG
}
"
)
...
...
cmake/cross_compiling/ios.cmake
浏览文件 @
94e86897
...
@@ -76,11 +76,9 @@ set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform")
...
@@ -76,11 +76,9 @@ set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform")
# Set the architecture for iOS
# Set the architecture for iOS
if
(
NOT DEFINED IOS_ARCH
)
if
(
NOT DEFINED IOS_ARCH
)
if
(
IOS_PLATFORM STREQUAL
"OS"
)
if
(
IOS_PLATFORM STREQUAL
"OS"
)
# FIXME(liuyiqun): support "armv7;armv7s;arm64" future
set
(
IOS_ARCH
"armv7;armv7s;arm64"
)
set
(
IOS_ARCH
"arm64"
)
elseif
(
IOS_PLATFORM STREQUAL
"SIMULATOR"
)
elseif
(
IOS_PLATFORM STREQUAL
"SIMULATOR"
)
# FIXME(liuyiqun): support "i386;x86_64" future
set
(
IOS_ARCH
"i386;x86_64"
)
set
(
IOS_ARCH
"x86_64"
)
endif
()
endif
()
endif
()
endif
()
set
(
CMAKE_OSX_ARCHITECTURES
${
IOS_ARCH
}
CACHE string
"Build architecture for iOS"
)
set
(
CMAKE_OSX_ARCHITECTURES
${
IOS_ARCH
}
CACHE string
"Build architecture for iOS"
)
...
@@ -248,7 +246,7 @@ set(IOS_COMPILER_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} ${XCODE_IOS_BITCODE_
...
@@ -248,7 +246,7 @@ set(IOS_COMPILER_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} ${XCODE_IOS_BITCODE_
# Hidden visibilty is required for cxx on iOS
# Hidden visibilty is required for cxx on iOS
set
(
CMAKE_C_FLAGS
"
${
IOS_COMPILER_FLAGS
}
${
CMAKE_C_FLAGS
}
"
CACHE STRING
"C flags"
)
set
(
CMAKE_C_FLAGS
"
${
IOS_COMPILER_FLAGS
}
${
CMAKE_C_FLAGS
}
"
CACHE STRING
"C flags"
)
set
(
CMAKE_CXX_FLAGS
"
${
IOS_COMPILER_FLAGS
}
-fvisibility-inlines-hidden
${
CMAKE_CXX_FLAGS
}
"
CACHE STRING
"CXX flags"
)
set
(
CMAKE_CXX_FLAGS
"
${
IOS_COMPILER_FLAGS
}
-fvisibility
=hidden -fvisibility
-inlines-hidden
${
CMAKE_CXX_FLAGS
}
"
CACHE STRING
"CXX flags"
)
set
(
IOS_LINK_FLAGS
"
${
XCODE_IOS_PLATFORM_VERSION_FLAGS
}
-Wl,-search_paths_first"
)
set
(
IOS_LINK_FLAGS
"
${
XCODE_IOS_PLATFORM_VERSION_FLAGS
}
-Wl,-search_paths_first"
)
...
...
cmake/cuda.cmake
浏览文件 @
94e86897
...
@@ -63,7 +63,6 @@ function(select_nvcc_arch_flags out_variable)
...
@@ -63,7 +63,6 @@ function(select_nvcc_arch_flags out_variable)
set
(
archs_name_default
"All"
)
set
(
archs_name_default
"All"
)
if
(
NOT CMAKE_CROSSCOMPILING
)
if
(
NOT CMAKE_CROSSCOMPILING
)
list
(
APPEND archs_names
"Auto"
)
list
(
APPEND archs_names
"Auto"
)
set
(
archs_name_default
"Auto"
)
endif
()
endif
()
# set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui)
# set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui)
...
...
cmake/external/mkldnn.cmake
浏览文件 @
94e86897
...
@@ -40,10 +40,9 @@ INCLUDE_DIRECTORIES(${MKLDNN_INC_DIR})
...
@@ -40,10 +40,9 @@ INCLUDE_DIRECTORIES(${MKLDNN_INC_DIR})
IF
(
${
CBLAS_PROVIDER
}
STREQUAL
"MKLML"
)
IF
(
${
CBLAS_PROVIDER
}
STREQUAL
"MKLML"
)
SET
(
MKLDNN_DEPENDS
${
MKLML_PROJECT
}
)
SET
(
MKLDNN_DEPENDS
${
MKLML_PROJECT
}
)
SET
(
MKLDNN_MKLROOT
${
MKLML_ROOT
}
)
MESSAGE
(
STATUS
"Build MKLDNN with MKLML
${
MKLML_ROOT
}
"
)
SET
(
MKLDNN_IOMP_LIB
${
MKLML_IOMP_LIB
}
)
ELSE
()
SET
(
MKLDNN_IOMP_DIR
${
MKLML_LIB_DIR
}
)
MESSAGE
(
FATAL_ERROR
"Should enable MKLML when build MKLDNN"
)
MESSAGE
(
STATUS
"Build MKLDNN with
${
MKLDNN_MKLROOT
}
"
)
ENDIF
()
ENDIF
()
SET
(
MKLDNN_CFLAG
"
${
CMAKE_C_FLAGS
}
-Wno-error=strict-overflow"
)
SET
(
MKLDNN_CFLAG
"
${
CMAKE_C_FLAGS
}
-Wno-error=strict-overflow"
)
...
@@ -57,15 +56,16 @@ ExternalProject_Add(
...
@@ -57,15 +56,16 @@ ExternalProject_Add(
PREFIX
${
MKLDNN_SOURCES_DIR
}
PREFIX
${
MKLDNN_SOURCES_DIR
}
UPDATE_COMMAND
""
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
${
MKLDNN_INSTALL_DIR
}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
${
MKLDNN_INSTALL_DIR
}
CMAKE_ARGS -DMKLROOT=
${
MKL
DNN_MKL
ROOT
}
CMAKE_ARGS -DMKLROOT=
${
MKL
ML_
ROOT
}
CMAKE_ARGS -DCMAKE_C_FLAGS=
${
MKLDNN_CFLAG
}
CMAKE_ARGS -DCMAKE_C_FLAGS=
${
MKLDNN_CFLAG
}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=
${
MKLDNN_CXXFLAG
}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=
${
MKLDNN_CXXFLAG
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
MKLDNN_INSTALL_DIR
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
MKLDNN_INSTALL_DIR
}
-DMKLROOT:PATH=
${
MKL
DNN_MKL
ROOT
}
-DMKLROOT:PATH=
${
MKL
ML_
ROOT
}
)
)
ADD_LIBRARY
(
mkldnn SHARED IMPORTED GLOBAL
)
ADD_LIBRARY
(
mkldnn SHARED IMPORTED GLOBAL
)
SET_PROPERTY
(
TARGET mkldnn PROPERTY IMPORTED_LOCATION
${
MKLDNN_LIB
}
)
SET_PROPERTY
(
TARGET mkldnn PROPERTY IMPORTED_LOCATION
${
MKLDNN_LIB
}
)
ADD_DEPENDENCIES
(
mkldnn
${
MKLDNN_PROJECT
}
)
ADD_DEPENDENCIES
(
mkldnn
${
MKLDNN_PROJECT
}
)
MESSAGE
(
STATUS
"Mkldnn library:
${
MKLDNN_LIB
}
"
)
MESSAGE
(
STATUS
"MKLDNN library:
${
MKLDNN_LIB
}
"
)
add_definitions
(
-DPADDLE_USE_MKLDNN
)
LIST
(
APPEND external_project_dependencies mkldnn
)
LIST
(
APPEND external_project_dependencies mkldnn
)
cmake/external/openblas.cmake
浏览文件 @
94e86897
...
@@ -29,7 +29,7 @@ IF(NOT ${CBLAS_FOUND})
...
@@ -29,7 +29,7 @@ IF(NOT ${CBLAS_FOUND})
"
${
CBLAS_INSTALL_DIR
}
/lib/
${
CMAKE_STATIC_LIBRARY_PREFIX
}
openblas
${
CMAKE_STATIC_LIBRARY_SUFFIX
}
"
"
${
CBLAS_INSTALL_DIR
}
/lib/
${
CMAKE_STATIC_LIBRARY_PREFIX
}
openblas
${
CMAKE_STATIC_LIBRARY_SUFFIX
}
"
CACHE FILEPATH
"openblas library."
FORCE
)
CACHE FILEPATH
"openblas library."
FORCE
)
SET
(
OPENBLAS_CC
"
${
CMAKE_C_COMPILER
}
"
)
SET
(
OPENBLAS_CC
"
${
CMAKE_C_COMPILER
}
-Wno-unused-but-set-variable -Wno-unused-variable
"
)
IF
(
CMAKE_CROSSCOMPILING
)
IF
(
CMAKE_CROSSCOMPILING
)
SET
(
OPTIONAL_ARGS HOSTCC=
${
HOST_C_COMPILER
}
)
SET
(
OPTIONAL_ARGS HOSTCC=
${
HOST_C_COMPILER
}
)
...
@@ -45,15 +45,14 @@ IF(NOT ${CBLAS_FOUND})
...
@@ -45,15 +45,14 @@ IF(NOT ${CBLAS_FOUND})
SET
(
OPTIONAL_ARGS
${
OPTIONAL_ARGS
}
TARGET=ARMV8 BINARY=64 USE_THREAD=0
)
SET
(
OPTIONAL_ARGS
${
OPTIONAL_ARGS
}
TARGET=ARMV8 BINARY=64 USE_THREAD=0
)
ENDIF
()
ENDIF
()
ELSEIF
(
IOS
)
ELSEIF
(
IOS
)
# FIXME(liuyiqun): support multiple architectures
IF
(
CMAKE_OSX_ARCHITECTURES MATCHES
"arm64"
)
SET
(
OPENBLAS_COMMIT
"b5c96fcfcdc82945502a2303116a64d89985daf5"
)
SET
(
OPENBLAS_COMMIT
"b5c96fcfcdc82945502a2303116a64d89985daf5"
)
SET
(
OPENBLAS_CC
"
${
OPENBLAS_CC
}
${
CMAKE_C_FLAGS
}
-isysroot
${
CMAKE_OSX_SYSROOT
}
"
)
SET
(
OPENBLAS_CC
"
${
OPENBLAS_CC
}
${
CMAKE_C_FLAGS
}
-isysroot
${
CMAKE_OSX_SYSROOT
}
"
)
IF
(
CMAKE_OSX_ARCHITECTURES MATCHES
"armv7"
)
SET
(
OPENBLAS_CC
"
${
OPENBLAS_CC
}
-arch armv7"
)
SET
(
OPTIONAL_ARGS
${
OPTIONAL_ARGS
}
TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0
)
ELSEIF
(
CMAKE_OSX_ARCHITECTURES MATCHES
"arm64"
)
SET
(
OPENBLAS_CC
"
${
OPENBLAS_CC
}
-arch arm64"
)
SET
(
OPENBLAS_CC
"
${
OPENBLAS_CC
}
-arch arm64"
)
SET
(
OPTIONAL_ARGS
${
OPTIONAL_ARGS
}
TARGET=ARMV8 BINARY=64 USE_THREAD=0 CROSS_SUFFIX=
${
CROSS_SUFFIX
}
)
SET
(
OPTIONAL_ARGS
${
OPTIONAL_ARGS
}
TARGET=ARMV8 BINARY=64 USE_THREAD=0 CROSS_SUFFIX=
${
CROSS_SUFFIX
}
)
ELSE
()
MESSAGE
(
FATAL_ERROR
"OpenBLAS only support arm64 architectures on iOS. "
"You can set IOS_USE_VECLIB_FOR_BLAS=ON or USE_EIGEN_FOR_BLAS=ON to use other blas library instead."
)
ENDIF
()
ENDIF
()
ELSEIF
(
RPI
)
ELSEIF
(
RPI
)
# use hardfp
# use hardfp
...
...
cmake/external/warpctc.cmake
浏览文件 @
94e86897
...
@@ -12,6 +12,10 @@
...
@@ -12,6 +12,10 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
IF
(
MOBILE_INFERENCE
)
return
()
ENDIF
()
INCLUDE
(
ExternalProject
)
INCLUDE
(
ExternalProject
)
SET
(
WARPCTC_SOURCES_DIR
${
THIRD_PARTY_PATH
}
/warpctc
)
SET
(
WARPCTC_SOURCES_DIR
${
THIRD_PARTY_PATH
}
/warpctc
)
...
...
cmake/util.cmake
浏览文件 @
94e86897
...
@@ -115,8 +115,8 @@ function(link_paddle_exe TARGET_NAME)
...
@@ -115,8 +115,8 @@ function(link_paddle_exe TARGET_NAME)
target_link_libraries
(
${
TARGET_NAME
}
log
)
target_link_libraries
(
${
TARGET_NAME
}
log
)
endif
(
ANDROID
)
endif
(
ANDROID
)
if
(
WITH_MKL
DNN AND WITH_MKLML AND MKLDNN_IOMP_DIR
)
if
(
WITH_MKL
ML AND MKLML_LIB_DIR AND MKLML_IOMP_LIB
)
target_link_libraries
(
${
TARGET_NAME
}
"-L
${
MKL
DNN_IOMP
_DIR
}
-liomp5 -Wl,--as-needed"
)
target_link_libraries
(
${
TARGET_NAME
}
"-L
${
MKL
ML_LIB
_DIR
}
-liomp5 -Wl,--as-needed"
)
endif
()
endif
()
add_dependencies
(
${
TARGET_NAME
}
${
external_project_dependencies
}
)
add_dependencies
(
${
TARGET_NAME
}
${
external_project_dependencies
}
)
...
...
doc/design/mkldnn/README.MD
浏览文件 @
94e86897
...
@@ -36,13 +36,13 @@ Figure 1. PaddlePaddle on IA.
...
@@ -36,13 +36,13 @@ Figure 1. PaddlePaddle on IA.
我们把集成方案大致分为了如下几个方面。
我们把集成方案大致分为了如下几个方面。
### CMake
### CMake
我们会在
`CMakeLists.txt`
中会
添加
`WITH_MKLDNN`
的选项,当设置这个值为
`ON`
的时候会启用编译MKL-DNN功能。同时会自动开启OpenMP用于提高MKL-DNN的性能
。
我们会在
`CMakeLists.txt`
中会
给用户添加一个
`WITH_MKL`
的开关,他是负责
`WITH_MKLML`
和
`WITH_MKLDNN`
的总开关
。
同时,我们会引入
`WITH_MKLML`
选项,用于选择是否使用MKL-DNN自带的MKLML安装包。这个安装包可以独立于MKL-DNN使用,但是建议在开启MKL-DNN的同时也打开MKLML的开关,这样才能发挥最好的性
能。
当打开
`WITH_MKL`
时,会开启MKLML的功能,作为PaddlePaddle的CBLAS和LAPACK库,同时会开启Intel OpenMP用于提高MKLML的性能。 如果系统支持AVX2指令集及以上,同时会开启MKL-DNN功
能。
所以,我们会在
`cmake/external`
目录新建
`mkldnn.cmake`
和
`mklml.cmake`
文件,它们会在编译PaddlePaddle的时候下载对应的软件包,并放到PaddlePaddle的third party目录中
。
当关闭
`WITH_MKL`
时,MKLML和MKL-DNN功能会同时关闭
。
**备注**
:当
`WITH_MKLML=ON`
的时候,会优先使用这个包作为PaddlePaddle的CBLAS和LAPACK库,所以会稍微改动
`cmake/cblas.cmake`
中的逻辑
。
所以,我们会在
`cmake/external`
目录新建
`mkldnn.cmake`
和
`mklml.cmake`
文件,它们会在编译PaddlePaddle的时候下载对应的软件包,并放到PaddlePaddle的third party目录中
。
### Layers
### Layers
所有MKL-DNN相关的C++ layers,都会按照PaddlePaddle的目录结构存放在
所有MKL-DNN相关的C++ layers,都会按照PaddlePaddle的目录结构存放在
...
...
doc/howto/dev/write_docs_cn.rst
浏览文件 @
94e86897
...
@@ -34,7 +34,7 @@ PaddlePaddle的文档构建有两种方式。
...
@@ -34,7 +34,7 @@ PaddlePaddle的文档构建有两种方式。
cd TO_YOUR_PADDLE_CLONE_PATH
cd TO_YOUR_PADDLE_CLONE_PATH
mkdir -p build
mkdir -p build
cd build
cd build
cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKL
DNN=OFF -DWITH_MKLML
=OFF -DWITH_DOC=ON
cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKL=OFF -DWITH_DOC=ON
make gen_proto_py
make gen_proto_py
make paddle_docs paddle_docs_cn
make paddle_docs paddle_docs_cn
...
...
doc/mobile/cross_compiling_for_android_cn.md
浏览文件 @
94e86897
#
构建Android平台上的PaddlePaddle库
#
Android平台编译指南
用户可通过如下两种方式,交叉编译Android平台上适用的PaddlePaddle库:
用户可通过如下两种方式,交叉编译Android平台上适用的PaddlePaddle库:
-
基于Docker容器的编译方式
-
基于Docker容器的编译方式
...
...
doc/mobile/cross_compiling_for_ios_cn.md
浏览文件 @
94e86897
#
构建iOS平台上的PaddlePaddle库
#
iOS平台编译指南
交叉编译iOS平台上适用的PaddlePaddle库,需要在MacOS系统上进行。本文的将介绍在MacOS上,从源码交叉编译iOS平台上适用的PaddlePaddle库。
交叉编译iOS平台上适用的PaddlePaddle库,需要在MacOS系统上进行。本文的将介绍在MacOS上,从源码交叉编译iOS平台上适用的PaddlePaddle库。
## 准备交叉编译环境
## 准备交叉编译环境
...
@@ -25,7 +25,7 @@ iOS平台可选配置参数:
...
@@ -25,7 +25,7 @@ iOS平台可选配置参数:
-
`IOS_PLATFORM`
,可设置为
`OS/SIMULATOR`
,默认值为
`OS`
。
-
`IOS_PLATFORM`
,可设置为
`OS/SIMULATOR`
,默认值为
`OS`
。
-
`OS`
,构建目标为
`arm`
架构的iPhone或者iPad等物理设备。
-
`OS`
,构建目标为
`arm`
架构的iPhone或者iPad等物理设备。
-
`SIMULATOR`
,构建目标为
`x86`
架构的模拟器平台。
-
`SIMULATOR`
,构建目标为
`x86`
架构的模拟器平台。
-
`IOS_ARCH`
,目标架构。针对不同的
`IOS_PLATFORM`
,可设置的目标架构如下表所示:
-
`IOS_ARCH`
,目标架构。针对不同的
`IOS_PLATFORM`
,可设置的目标架构如下表所示
,默认编译所有架构
:
<table class="docutils">
<table class="docutils">
<colgroup>
<colgroup>
...
@@ -41,11 +41,11 @@ iOS平台可选配置参数:
...
@@ -41,11 +41,11 @@ iOS平台可选配置参数:
<tbody valign="top">
<tbody valign="top">
<tr class="row-even">
<tr class="row-even">
<td>OS</td>
<td>OS</td>
<td>armv7, armv7s, arm64
(默认)
</td>
<td>armv7, armv7s, arm64 </td>
</tr>
</tr>
<tr class="row-odd">
<tr class="row-odd">
<td>SIMULATOR</td>
<td>SIMULATOR</td>
<td>i386, x86_64
(默认)
</td>
<td>i386, x86_64 </td>
</tr>
</tr>
</tbody>
</tbody>
</table>
</table>
...
@@ -66,7 +66,7 @@ iOS平台可选配置参数:
...
@@ -66,7 +66,7 @@ iOS平台可选配置参数:
```
bash
```
bash
cmake
-DCMAKE_SYSTEM_NAME
=
iOS
\
cmake
-DCMAKE_SYSTEM_NAME
=
iOS
\
-DIOS_PLATFORM
=
OS
\
-DIOS_PLATFORM
=
OS
\
-DIOS_ARCH
=
"arm64"
\
-DIOS_ARCH
=
"arm
v7;arm
64"
\
-DIOS_ENABLE_BITCODE
=
ON
\
-DIOS_ENABLE_BITCODE
=
ON
\
-DIOS_USE_VECLIB_FOR_BLAS
=
ON
\
-DIOS_USE_VECLIB_FOR_BLAS
=
ON
\
-DCMAKE_INSTALL_PREFIX
=
your/path/to/install
\
-DCMAKE_INSTALL_PREFIX
=
your/path/to/install
\
...
@@ -112,6 +112,6 @@ $ make install
...
@@ -112,6 +112,6 @@ $ make install
-
`lib`
目录,其中包含PaddlePaddle的C-API静态库
-
`lib`
目录,其中包含PaddlePaddle的C-API静态库
-
`third_party`
目录,其中包含所依赖的所有第三方库
-
`third_party`
目录,其中包含所依赖的所有第三方库
注意,
不同架构的PaddlePaddle库建议安装到不同的目录下,然后使用
`lipo`
工具将多个静态库合并成一个支持多个架构的
fat库。
注意,
如果PaddlePaddle库需要同时支持真机和模拟器,则需要分别编译真机和模拟器版本,然后使用
`lipo`
工具合并
fat库。
自此,PaddlePaddle库已经安装完成,用户可将合成的fat库用于深度学习相关的iOS App中,调用方法见C-API文档。
自此,PaddlePaddle库已经安装完成,用户可将合成的fat库用于深度学习相关的iOS App中,调用方法见C-API文档。
doc/mobile/cross_compiling_for_raspberry_cn.md
浏览文件 @
94e86897
#
构建Raspberry Pi平台上的PaddlePaddle库
#
Raspberry Pi平台编译指南
通常有两个方法来构建基于 Rasspberry Pi 的版本:
通常有两个方法来构建基于 Rasspberry Pi 的版本:
...
...
paddle/cuda/include/hl_gpu.h
浏览文件 @
94e86897
...
@@ -25,7 +25,9 @@ limitations under the License. */
...
@@ -25,7 +25,9 @@ limitations under the License. */
#include "hl_matrix.h"
#include "hl_matrix.h"
#include "hl_sequence.h"
#include "hl_sequence.h"
#include "hl_sparse.h"
#include "hl_sparse.h"
#ifndef PADDLE_MOBILE_INFERENCE
#include "hl_warpctc_wrap.h"
#include "hl_warpctc_wrap.h"
#endif
#ifdef HPPL_STUB_FUNC
#ifdef HPPL_STUB_FUNC
#include "stub/hl_aggregate_stub.h"
#include "stub/hl_aggregate_stub.h"
...
...
paddle/framework/backward.cc
浏览文件 @
94e86897
...
@@ -270,6 +270,19 @@ static bool AllGradInSet(const std::vector<std::string>& names,
...
@@ -270,6 +270,19 @@ static bool AllGradInSet(const std::vector<std::string>& names,
return
false
;
return
false
;
}
}
}
}
if
(
VLOG_IS_ON
(
10
))
{
std
::
ostringstream
sout
;
sout
<<
"All input {"
;
for
(
auto
&
name
:
names
)
{
sout
<<
name
<<
","
;
}
sout
<<
"} is in {"
;
for
(
auto
&
name
:
set
)
{
sout
<<
name
<<
","
;
}
sout
<<
"}"
;
VLOG
(
10
)
<<
sout
.
str
();
}
return
true
;
return
true
;
}
}
...
@@ -290,14 +303,12 @@ static void CreateGradVarInBlock(
...
@@ -290,14 +303,12 @@ static void CreateGradVarInBlock(
auto
ops
=
block_desc
->
AllOps
();
auto
ops
=
block_desc
->
AllOps
();
for
(
size_t
op_index
=
grad_op_start_index
;
op_index
<
ops
.
size
();
for
(
size_t
op_index
=
grad_op_start_index
;
op_index
<
ops
.
size
();
++
op_index
)
{
++
op_index
)
{
bool
need_infer_shape
=
false
;
std
::
unordered_set
<
std
::
string
>
new_vars
;
std
::
unordered_set
<
std
::
string
>
new_vars
;
ForEachVarName
(
ops
[
op_index
]
->
Outputs
(),
ForEachVarName
(
ops
[
op_index
]
->
Outputs
(),
[
&
](
const
std
::
string
&
grad_var_name
)
{
[
&
](
const
std
::
string
&
grad_var_name
)
{
if
(
block_desc
->
HasVar
(
grad_var_name
))
{
if
(
block_desc
->
HasVar
(
grad_var_name
))
{
return
false
;
return
false
;
}
}
need_infer_shape
=
true
;
auto
var
=
block_desc
->
Var
(
grad_var_name
);
auto
var
=
block_desc
->
Var
(
grad_var_name
);
new_vars
.
insert
(
var
->
Name
());
new_vars
.
insert
(
var
->
Name
());
auto
it
=
param_name_map
.
find
(
grad_var_name
);
auto
it
=
param_name_map
.
find
(
grad_var_name
);
...
@@ -311,23 +322,21 @@ static void CreateGradVarInBlock(
...
@@ -311,23 +322,21 @@ static void CreateGradVarInBlock(
grad_record
.
op_idx_
=
static_cast
<
int
>
(
op_index
);
grad_record
.
op_idx_
=
static_cast
<
int
>
(
op_index
);
return
false
;
/* not break */
return
false
;
/* not break */
});
});
if
(
need_infer_shape
)
{
ops
[
op_index
]
->
InferVarType
(
block_desc
);
ops
[
op_index
]
->
InferVarType
(
block_desc
);
for
(
auto
&
arg
:
ops
[
op_index
]
->
OutputArgumentNames
())
{
for
(
auto
&
arg
:
ops
[
op_index
]
->
OutputArgumentNames
())
{
if
(
new_vars
.
find
(
arg
)
==
new_vars
.
end
())
{
if
(
new_vars
.
find
(
arg
)
==
new_vars
.
end
())
{
continue
;
continue
;
}
}
auto
pname
=
FwdName
(
arg
);
auto
pname
=
FwdName
(
arg
);
auto
*
param
=
block_desc
->
FindVarRecursive
(
pname
);
auto
*
param
=
block_desc
->
FindVarRecursive
(
pname
);
auto
*
grad
=
block_desc
->
FindVar
(
arg
);
auto
*
grad
=
block_desc
->
FindVar
(
arg
);
if
(
param
==
nullptr
)
{
if
(
param
==
nullptr
)
{
grad
->
SetDataType
(
DataType
::
FP32
);
grad
->
SetDataType
(
DataType
::
FP32
);
}
else
{
}
else
{
grad
->
SetDataType
(
param
->
GetDataType
());
grad
->
SetDataType
(
param
->
GetDataType
());
}
}
}
ops
[
op_index
]
->
InferShape
(
*
block_desc
);
}
}
ops
[
op_index
]
->
InferShape
(
*
block_desc
);
}
}
}
}
...
@@ -387,6 +396,7 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
...
@@ -387,6 +396,7 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
ProgramDescBind
&
program_desc
,
int
block_idx
,
ProgramDescBind
&
program_desc
,
int
block_idx
,
std
::
unordered_set
<
std
::
string
>*
no_grad_vars
,
std
::
unordered_set
<
std
::
string
>*
no_grad_vars
,
std
::
unordered_map
<
std
::
string
,
std
::
string
>*
grad_to_var
)
{
std
::
unordered_map
<
std
::
string
,
std
::
string
>*
grad_to_var
)
{
VLOG
(
5
)
<<
"MakeBlockBackward"
;
BlockDescBind
*
cur_block
=
program_desc
.
MutableBlock
(
block_idx
);
BlockDescBind
*
cur_block
=
program_desc
.
MutableBlock
(
block_idx
);
std
::
vector
<
OpDescBind
*>
op_descs
=
cur_block
->
AllOps
();
std
::
vector
<
OpDescBind
*>
op_descs
=
cur_block
->
AllOps
();
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
size_t
>>
dup_out_ops
;
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
size_t
>>
dup_out_ops
;
...
@@ -394,9 +404,10 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
...
@@ -394,9 +404,10 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
std
::
vector
<
std
::
unique_ptr
<
OpDescBind
>>
backward_descs
;
std
::
vector
<
std
::
unique_ptr
<
OpDescBind
>>
backward_descs
;
for
(
auto
it
=
op_descs
.
rbegin
();
it
!=
op_descs
.
rend
();
++
it
)
{
for
(
auto
it
=
op_descs
.
rbegin
();
it
!=
op_descs
.
rend
();
++
it
)
{
VLOG
(
5
)
<<
"Making backward "
<<
(
*
it
)
->
Type
()
<<
" op"
;
std
::
vector
<
std
::
unique_ptr
<
OpDescBind
>>
op_grads
;
std
::
vector
<
std
::
unique_ptr
<
OpDescBind
>>
op_grads
;
if
((
*
it
)
->
Type
()
==
"recurrent"
)
{
if
((
*
it
)
->
Type
()
==
"recurrent"
||
(
*
it
)
->
Type
()
==
"while"
)
{
int
step_block_idx
=
(
*
it
)
->
GetBlockAttr
(
"step_block"
);
int
step_block_idx
=
(
*
it
)
->
GetBlockAttr
(
"step_block"
);
BlockDescBind
*
backward_block
=
CreateStepBlock
(
BlockDescBind
*
backward_block
=
CreateStepBlock
(
program_desc
,
no_grad_vars
,
grad_to_var
,
step_block_idx
);
program_desc
,
no_grad_vars
,
grad_to_var
,
step_block_idx
);
...
@@ -410,6 +421,15 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
...
@@ -410,6 +421,15 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
op_grads
=
MakeOpGrad
(
*
it
,
no_grad_vars
,
grad_to_var
);
op_grads
=
MakeOpGrad
(
*
it
,
no_grad_vars
,
grad_to_var
);
}
}
if
(
VLOG_IS_ON
(
10
))
{
std
::
ostringstream
sout
;
sout
<<
"Made "
;
for
(
auto
&
op_grad
:
op_grads
)
{
sout
<<
op_grad
->
Type
()
<<
" "
;
}
VLOG
(
10
)
<<
sout
.
str
();
}
for
(
const
auto
&
desc
:
op_grads
)
{
for
(
const
auto
&
desc
:
op_grads
)
{
for
(
const
std
::
string
&
out_name
:
desc
->
OutputArgumentNames
())
{
for
(
const
std
::
string
&
out_name
:
desc
->
OutputArgumentNames
())
{
if
(
out_name
.
find
(
"@GRAD"
)
==
std
::
string
::
npos
)
{
if
(
out_name
.
find
(
"@GRAD"
)
==
std
::
string
::
npos
)
{
...
@@ -425,6 +445,8 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
...
@@ -425,6 +445,8 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
op_grads
.
begin
(),
op_grads
.
end
(),
std
::
back_inserter
(
backward_descs
),
op_grads
.
begin
(),
op_grads
.
end
(),
std
::
back_inserter
(
backward_descs
),
[](
std
::
unique_ptr
<
OpDescBind
>&
ptr
)
{
return
std
::
move
(
ptr
);
});
[](
std
::
unique_ptr
<
OpDescBind
>&
ptr
)
{
return
std
::
move
(
ptr
);
});
}
}
VLOG
(
5
)
<<
"Appending Sums"
;
// Check whether some variables are written more than once
// Check whether some variables are written more than once
std
::
list
<
std
::
pair
<
size_t
,
std
::
unique_ptr
<
OpDescBind
>>>
pending_sum_ops
;
std
::
list
<
std
::
pair
<
size_t
,
std
::
unique_ptr
<
OpDescBind
>>>
pending_sum_ops
;
for
(
const
auto
&
dup
:
dup_out_ops
)
{
for
(
const
auto
&
dup
:
dup_out_ops
)
{
...
@@ -432,16 +454,22 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
...
@@ -432,16 +454,22 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
const
std
::
vector
<
size_t
>
dup_op
=
dup
.
second
;
const
std
::
vector
<
size_t
>
dup_op
=
dup
.
second
;
if
(
out_name
!=
kEmptyVarName
&&
dup_op
.
size
()
>
1
)
{
if
(
out_name
!=
kEmptyVarName
&&
dup_op
.
size
()
>
1
)
{
std
::
vector
<
std
::
string
>
sum_op_inputs
;
std
::
vector
<
std
::
string
>
sum_op_inputs
;
std
::
string
next_g_name
=
out_name
;
for
(
size_t
i
=
0
;
i
<
dup_op
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
dup_op
.
size
();
++
i
)
{
VLOG
(
10
)
<<
backward_descs
[
dup_op
[
i
]]
->
Type
()
<<
" has "
<<
out_name
<<
" duplicated"
;
std
::
string
new_name
=
out_name
+
"@RENAME@"
+
std
::
to_string
(
i
);
std
::
string
new_name
=
out_name
+
"@RENAME@"
+
std
::
to_string
(
i
);
backward_descs
[
dup_op
[
i
]]
->
Rename
(
out_name
,
new_name
);
backward_descs
[
dup_op
[
i
]]
->
RenameOutput
(
out_name
,
new_name
);
backward_descs
[
dup_op
[
i
]]
->
RenameInput
(
out_name
,
next_g_name
);
sum_op_inputs
.
emplace_back
(
new_name
);
sum_op_inputs
.
emplace_back
(
new_name
);
next_g_name
=
sum_op_inputs
.
back
();
}
}
std
::
unique_ptr
<
OpDescBind
>
sum_op
(
new
OpDescBind
(
std
::
unique_ptr
<
OpDescBind
>
sum_op
(
new
OpDescBind
(
"sum"
,
{{
"X"
,
sum_op_inputs
}},
{{
"Out"
,
{
out_name
}}},
{}));
"sum"
,
{{
"X"
,
sum_op_inputs
}},
{{
"Out"
,
{
out_name
}}},
{}));
pending_sum_ops
.
push_back
({
dup_op
.
back
(),
std
::
move
(
sum_op
)});
pending_sum_ops
.
push_back
({
dup_op
.
back
(),
std
::
move
(
sum_op
)});
}
}
}
}
pending_sum_ops
.
sort
(
pending_sum_ops
.
sort
(
[](
const
std
::
pair
<
size_t
,
std
::
unique_ptr
<
OpDescBind
>>&
a
,
[](
const
std
::
pair
<
size_t
,
std
::
unique_ptr
<
OpDescBind
>>&
a
,
const
std
::
pair
<
size_t
,
std
::
unique_ptr
<
OpDescBind
>>&
b
)
{
const
std
::
pair
<
size_t
,
std
::
unique_ptr
<
OpDescBind
>>&
b
)
{
...
@@ -452,6 +480,8 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
...
@@ -452,6 +480,8 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
std
::
move
(
p
.
second
));
std
::
move
(
p
.
second
));
}
}
VLOG
(
5
)
<<
"MakeBlockBackward Finished"
;
return
backward_descs
;
return
backward_descs
;
}
}
...
...
paddle/framework/data_type.h
浏览文件 @
94e86897
...
@@ -29,6 +29,8 @@ inline DataType ToDataType(std::type_index type) {
...
@@ -29,6 +29,8 @@ inline DataType ToDataType(std::type_index type) {
return
DataType
::
INT32
;
return
DataType
::
INT32
;
}
else
if
(
typeid
(
int64_t
).
hash_code
()
==
type
.
hash_code
())
{
}
else
if
(
typeid
(
int64_t
).
hash_code
()
==
type
.
hash_code
())
{
return
DataType
::
INT64
;
return
DataType
::
INT64
;
}
else
if
(
typeid
(
bool
).
hash_code
()
==
type
.
hash_code
())
{
return
DataType
::
BOOL
;
}
else
{
}
else
{
PADDLE_THROW
(
"Not supported"
);
PADDLE_THROW
(
"Not supported"
);
}
}
...
...
paddle/framework/ddim.cc
浏览文件 @
94e86897
...
@@ -60,8 +60,7 @@ void make_ddim(DDim& ddim, const int64_t* dims, int n) {
...
@@ -60,8 +60,7 @@ void make_ddim(DDim& ddim, const int64_t* dims, int n) {
ddim
=
make_dim
<
9
>
(
dims
);
ddim
=
make_dim
<
9
>
(
dims
);
break
;
break
;
default:
default:
throw
std
::
invalid_argument
(
PADDLE_THROW
(
"Dynamic dimensions must have between [1, 9] dimensions."
);
"Dynamic dimensions must have between [1, 9] dimensions."
);
}
}
}
}
...
...
paddle/framework/executor.cc
浏览文件 @
94e86897
...
@@ -120,6 +120,7 @@ void Executor::Run(const ProgramDescBind& pdesc, Scope* scope, int block_id,
...
@@ -120,6 +120,7 @@ void Executor::Run(const ProgramDescBind& pdesc, Scope* scope, int block_id,
for
(
auto
&
op_desc
:
block
.
AllOps
())
{
for
(
auto
&
op_desc
:
block
.
AllOps
())
{
auto
op
=
paddle
::
framework
::
OpRegistry
::
CreateOp
(
*
op_desc
);
auto
op
=
paddle
::
framework
::
OpRegistry
::
CreateOp
(
*
op_desc
);
VLOG
(
10
)
<<
op
->
DebugString
();
op
->
Run
(
*
local_scope
,
*
device
);
op
->
Run
(
*
local_scope
,
*
device
);
}
}
if
(
create_local_scope
)
{
if
(
create_local_scope
)
{
...
...
paddle/framework/op_desc.cc
浏览文件 @
94e86897
...
@@ -235,6 +235,23 @@ void OpDescBind::Rename(const std::string &old_name,
...
@@ -235,6 +235,23 @@ void OpDescBind::Rename(const std::string &old_name,
need_update_
=
true
;
need_update_
=
true
;
}
}
void
OpDescBind
::
RenameOutput
(
const
std
::
string
&
old_name
,
const
std
::
string
&
new_name
)
{
for
(
auto
&
output
:
outputs_
)
{
std
::
replace
(
output
.
second
.
begin
(),
output
.
second
.
end
(),
old_name
,
new_name
);
}
need_update_
=
true
;
}
void
OpDescBind
::
RenameInput
(
const
std
::
string
&
old_name
,
const
std
::
string
&
new_name
)
{
for
(
auto
&
input
:
inputs_
)
{
std
::
replace
(
input
.
second
.
begin
(),
input
.
second
.
end
(),
old_name
,
new_name
);
}
need_update_
=
true
;
}
struct
SetAttrDescVisitor
:
public
boost
::
static_visitor
<
void
>
{
struct
SetAttrDescVisitor
:
public
boost
::
static_visitor
<
void
>
{
explicit
SetAttrDescVisitor
(
OpDesc
::
Attr
*
attr
)
:
attr_
(
attr
)
{}
explicit
SetAttrDescVisitor
(
OpDesc
::
Attr
*
attr
)
:
attr_
(
attr
)
{}
mutable
OpDesc
::
Attr
*
attr_
;
mutable
OpDesc
::
Attr
*
attr_
;
...
@@ -448,7 +465,12 @@ const std::vector<std::string> &CompileTimeInferShapeContext::Outputs(
...
@@ -448,7 +465,12 @@ const std::vector<std::string> &CompileTimeInferShapeContext::Outputs(
DDim
CompileTimeInferShapeContext
::
GetDim
(
const
std
::
string
&
name
)
const
{
DDim
CompileTimeInferShapeContext
::
GetDim
(
const
std
::
string
&
name
)
const
{
auto
var
=
block_
.
FindVarRecursive
(
name
);
auto
var
=
block_
.
FindVarRecursive
(
name
);
PADDLE_ENFORCE
(
var
!=
nullptr
,
"Cannot find variable %s"
,
name
);
PADDLE_ENFORCE
(
var
!=
nullptr
,
"Cannot find variable %s"
,
name
);
return
framework
::
make_ddim
(
var
->
Shape
());
try
{
return
framework
::
make_ddim
(
var
->
Shape
());
}
catch
(...)
{
VLOG
(
5
)
<<
"GetDim of variable "
<<
name
<<
" error"
;
std
::
rethrow_exception
(
std
::
current_exception
());
}
}
}
void
CompileTimeInferShapeContext
::
SetDim
(
const
std
::
string
&
name
,
void
CompileTimeInferShapeContext
::
SetDim
(
const
std
::
string
&
name
,
...
...
paddle/framework/op_desc.h
浏览文件 @
94e86897
...
@@ -73,6 +73,10 @@ class OpDescBind {
...
@@ -73,6 +73,10 @@ class OpDescBind {
void
Rename
(
const
std
::
string
&
old_name
,
const
std
::
string
&
new_name
);
void
Rename
(
const
std
::
string
&
old_name
,
const
std
::
string
&
new_name
);
void
RenameOutput
(
const
std
::
string
&
old_name
,
const
std
::
string
&
new_name
);
void
RenameInput
(
const
std
::
string
&
old_name
,
const
std
::
string
&
new_name
);
// Only be used in C++
// Only be used in C++
const
AttributeMap
&
GetAttrMap
()
const
;
const
AttributeMap
&
GetAttrMap
()
const
;
...
...
paddle/framework/operator.cc
浏览文件 @
94e86897
...
@@ -403,19 +403,6 @@ class RuntimeInferShapeContext : public InferShapeContext {
...
@@ -403,19 +403,6 @@ class RuntimeInferShapeContext : public InferShapeContext {
void
OperatorWithKernel
::
Run
(
const
Scope
&
scope
,
void
OperatorWithKernel
::
Run
(
const
Scope
&
scope
,
const
platform
::
DeviceContext
&
dev_ctx
)
const
{
const
platform
::
DeviceContext
&
dev_ctx
)
const
{
if
(
VLOG_IS_ON
(
1
))
{
auto
inputs
=
this
->
InputVars
();
auto
outputs
=
this
->
OutputVars
(
true
);
std
::
ostringstream
sout
;
sout
<<
"Run operator "
<<
this
->
Type
()
<<
" From ["
;
std
::
ostream_iterator
<
std
::
string
>
out_it
(
sout
,
","
);
std
::
copy
(
inputs
.
begin
(),
inputs
.
end
(),
out_it
);
sout
<<
"] to ["
;
std
::
copy
(
outputs
.
begin
(),
outputs
.
end
(),
out_it
);
sout
<<
"]"
;
VLOG
(
1
)
<<
sout
.
str
();
}
RuntimeInferShapeContext
infer_shape_ctx
(
*
this
,
scope
);
RuntimeInferShapeContext
infer_shape_ctx
(
*
this
,
scope
);
this
->
InferShape
(
&
infer_shape_ctx
);
this
->
InferShape
(
&
infer_shape_ctx
);
...
...
paddle/framework/scope.cc
浏览文件 @
94e86897
...
@@ -38,11 +38,12 @@ Scope& Scope::NewScope() const {
...
@@ -38,11 +38,12 @@ Scope& Scope::NewScope() const {
Variable
*
Scope
::
Var
(
const
std
::
string
&
name
)
{
Variable
*
Scope
::
Var
(
const
std
::
string
&
name
)
{
auto
iter
=
vars_
.
find
(
name
);
auto
iter
=
vars_
.
find
(
name
);
if
(
iter
!=
vars_
.
end
())
{
if
(
iter
!=
vars_
.
end
())
{
VLOG
(
3
)
<<
"Get existing variable "
<<
name
;
return
iter
->
second
;
return
iter
->
second
;
}
}
Variable
*
v
=
new
Variable
();
Variable
*
v
=
new
Variable
();
vars_
[
name
]
=
v
;
vars_
[
name
]
=
v
;
VLOG
(
3
)
<<
"Create variable "
<<
name
<<
" on scope"
;
VLOG
(
3
)
<<
"Create variable "
<<
name
;
v
->
name_
=
&
(
vars_
.
find
(
name
)
->
first
);
v
->
name_
=
&
(
vars_
.
find
(
name
)
->
first
);
return
v
;
return
v
;
}
}
...
...
paddle/framework/shape_inference.h
浏览文件 @
94e86897
...
@@ -53,6 +53,10 @@ class InferShapeContext {
...
@@ -53,6 +53,10 @@ class InferShapeContext {
virtual
bool
IsRuntime
()
const
=
0
;
virtual
bool
IsRuntime
()
const
=
0
;
// Note: In while op, we need this to be public
void
SetDims
(
const
std
::
vector
<
std
::
string
>
&
names
,
const
std
::
vector
<
framework
::
DDim
>
&
dims
);
protected:
protected:
virtual
framework
::
DDim
GetDim
(
const
std
::
string
&
name
)
const
=
0
;
virtual
framework
::
DDim
GetDim
(
const
std
::
string
&
name
)
const
=
0
;
virtual
void
SetDim
(
const
std
::
string
&
name
,
const
framework
::
DDim
&
dim
)
=
0
;
virtual
void
SetDim
(
const
std
::
string
&
name
,
const
framework
::
DDim
&
dim
)
=
0
;
...
@@ -60,9 +64,6 @@ class InferShapeContext {
...
@@ -60,9 +64,6 @@ class InferShapeContext {
std
::
vector
<
framework
::
DDim
>
GetDims
(
std
::
vector
<
framework
::
DDim
>
GetDims
(
const
std
::
vector
<
std
::
string
>
&
names
)
const
;
const
std
::
vector
<
std
::
string
>
&
names
)
const
;
void
SetDims
(
const
std
::
vector
<
std
::
string
>
&
names
,
const
std
::
vector
<
framework
::
DDim
>
&
dims
);
std
::
vector
<
VarDesc
::
VarType
>
GetVarTypes
(
std
::
vector
<
VarDesc
::
VarType
>
GetVarTypes
(
const
std
::
vector
<
std
::
string
>
&
names
)
const
;
const
std
::
vector
<
std
::
string
>
&
names
)
const
;
...
...
paddle/gserver/layers/MKLDNNLayer.cpp
浏览文件 @
94e86897
...
@@ -22,7 +22,7 @@ namespace paddle {
...
@@ -22,7 +22,7 @@ namespace paddle {
bool
MKLDNNLayer
::
init
(
const
LayerMap
&
layerMap
,
bool
MKLDNNLayer
::
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
const
ParameterMap
&
parameterMap
)
{
CHECK
(
FLAGS_use_mkldnn
)
<<
"MkldnnLayers only support use_mkldnn."
CHECK
(
FLAGS_use_mkldnn
)
<<
"MkldnnLayers only support use_mkldnn."
<<
"Please set WITH_MKL
DNN
=ON "
<<
"Please set WITH_MKL=ON "
<<
"and set use_mkldnn=True"
;
<<
"and set use_mkldnn=True"
;
CHECK
(
!
useGpu_
)
<<
"Do not support GPU yet"
;
CHECK
(
!
useGpu_
)
<<
"Do not support GPU yet"
;
...
...
paddle/math/Storage.cpp
浏览文件 @
94e86897
...
@@ -17,9 +17,13 @@ limitations under the License. */
...
@@ -17,9 +17,13 @@ limitations under the License. */
#include "paddle/utils/StringUtil.h"
#include "paddle/utils/StringUtil.h"
#include "paddle/utils/Util.h"
#include "paddle/utils/Util.h"
#ifndef PADDLE_MOBILE_INFERENCE
DEFINE_int32
(
pool_limit_size
,
DEFINE_int32
(
pool_limit_size
,
536870912
,
536870912
,
"maximum memory size managed by a memory pool, default is 512M"
);
"maximum memory size managed by a memory pool, default is 512M"
);
#else
DEFINE_int32
(
pool_limit_size
,
0
,
"default is 0"
);
#endif
namespace
paddle
{
namespace
paddle
{
...
...
paddle/operators/array_operator.h
浏览文件 @
94e86897
...
@@ -42,6 +42,7 @@ class ArrayOp : public framework::OperatorBase {
...
@@ -42,6 +42,7 @@ class ArrayOp : public framework::OperatorBase {
}
else
{
}
else
{
offset
=
static_cast
<
size_t
>
(
*
i_tensor
.
data
<
int64_t
>
());
offset
=
static_cast
<
size_t
>
(
*
i_tensor
.
data
<
int64_t
>
());
}
}
VLOG
(
10
)
<<
" Offset = "
<<
offset
;
return
offset
;
return
offset
;
}
}
};
};
...
...
paddle/operators/bilinear_tensor_product_op.h
浏览文件 @
94e86897
...
@@ -174,7 +174,7 @@ class BilinearTensorProductGradKernel : public framework::OpKernel<T> {
...
@@ -174,7 +174,7 @@ class BilinearTensorProductGradKernel : public framework::OpKernel<T> {
// Caculate the gradient of Input(Bias).
// Caculate the gradient of Input(Bias).
if
(
d_bias
)
{
if
(
d_bias
)
{
d_bias
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
d_bias
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
d_bias_mat
=
EigenMatrix
<
T
>::
From
(
*
d_bias
);
auto
d_bias_mat
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
d_bias
);
d_bias_mat
.
device
(
place
)
=
d_out_mat
.
sum
(
Eigen
::
DSizes
<
int
,
1
>
(
0
));
d_bias_mat
.
device
(
place
)
=
d_out_mat
.
sum
(
Eigen
::
DSizes
<
int
,
1
>
(
0
));
}
}
}
}
...
...
paddle/operators/conv_transpose_op.cc
浏览文件 @
94e86897
...
@@ -30,11 +30,6 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
...
@@ -30,11 +30,6 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
std
::
vector
<
int
>
strides
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
strides
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
paddings
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"paddings"
);
for
(
size_t
i
=
0
;
i
<
paddings
.
size
();
++
i
)
{
PADDLE_ENFORCE_EQ
(
paddings
[
i
],
0
,
"No Padding allowed in conv transpose op."
);
}
PADDLE_ENFORCE
(
in_dims
.
size
()
==
4
||
in_dims
.
size
()
==
5
,
PADDLE_ENFORCE
(
in_dims
.
size
()
==
4
||
in_dims
.
size
()
==
5
,
"ConvTransposeOp intput should be 4-D or 5-D tensor."
);
"ConvTransposeOp intput should be 4-D or 5-D tensor."
);
PADDLE_ENFORCE_EQ
(
in_dims
.
size
(),
filter_dims
.
size
(),
PADDLE_ENFORCE_EQ
(
in_dims
.
size
(),
filter_dims
.
size
(),
...
@@ -52,7 +47,7 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
...
@@ -52,7 +47,7 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
std
::
vector
<
int64_t
>
output_shape
({
in_dims
[
0
],
filter_dims
[
1
]});
std
::
vector
<
int64_t
>
output_shape
({
in_dims
[
0
],
filter_dims
[
1
]});
for
(
size_t
i
=
0
;
i
<
strides
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
strides
.
size
();
++
i
)
{
output_shape
.
push_back
((
in_dims
[
i
+
2
]
-
1
)
*
strides
[
i
]
+
output_shape
.
push_back
((
in_dims
[
i
+
2
]
-
1
)
*
strides
[
i
]
-
2
*
paddings
[
i
]
+
filter_dims
[
i
+
2
]);
filter_dims
[
i
+
2
]);
}
}
ctx
->
SetOutputDim
(
"Output"
,
framework
::
make_ddim
(
output_shape
));
ctx
->
SetOutputDim
(
"Output"
,
framework
::
make_ddim
(
output_shape
));
...
...
paddle/operators/conv_transpose_op.h
浏览文件 @
94e86897
...
@@ -62,7 +62,6 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
...
@@ -62,7 +62,6 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
Tensor
*
output
=
context
.
Output
<
Tensor
>
(
"Output"
);
Tensor
*
output
=
context
.
Output
<
Tensor
>
(
"Output"
);
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
// Actually, no paddings and groups allowed in conv transpose.
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
// TODO(Zhuoyuan): Paddings can be added in future.
// TODO(Zhuoyuan): Paddings can be added in future.
// groups will alway be disabled in conv2dtranspose.
// groups will alway be disabled in conv2dtranspose.
...
@@ -148,8 +147,8 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
...
@@ -148,8 +147,8 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
}
else
if
(
filter_shape_vec
.
size
()
==
3
)
{
}
else
if
(
filter_shape_vec
.
size
()
==
3
)
{
// col2vol: col_matrix -> dy
// col2vol: col_matrix -> dy
// from (c * k_d * k_h * k_w, d * h * w) to (c, o_d, o_h, o_w)
// from (c * k_d * k_h * k_w, d * h * w) to (c, o_d, o_h, o_w)
col2vol
(
context
.
device_context
(),
col
,
dilations
,
strides
,
col2vol
(
context
.
device_context
(),
col
,
dilations
,
strides
,
paddings
,
std
::
vector
<
int
>
{
0
,
0
,
0
},
&
output_batch
);
&
output_batch
);
}
}
}
}
}
}
...
@@ -173,7 +172,6 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
...
@@ -173,7 +172,6 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
if
((
!
input_grad
)
&&
(
!
filter_grad
))
return
;
if
((
!
input_grad
)
&&
(
!
filter_grad
))
return
;
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
// Actually, no paddings and groups allowed in conv transpose.
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
...
...
paddle/operators/cos_sim_op.h
浏览文件 @
94e86897
...
@@ -132,7 +132,7 @@ class CosSimGradKernel : public framework::OpKernel<T> {
...
@@ -132,7 +132,7 @@ class CosSimGradKernel : public framework::OpKernel<T> {
// compute dy
// compute dy
if
(
out_grad_y
)
{
if
(
out_grad_y
)
{
out_grad_y
->
mutable_data
<
T
>
(
context
.
GetPlace
());
out_grad_y
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
dy
=
Eigen
Matrix
<
T
>::
Reshape
(
*
out_grad_y
,
1
);
auto
dy
=
Eigen
Vector
<
T
>::
Flatten
(
*
out_grad_y
);
auto
grad
=
x
/
norm_prod_bcast
-
z_bcast
*
y_bcast
/
y_snorm_bcast
;
auto
grad
=
x
/
norm_prod_bcast
-
z_bcast
*
y_bcast
/
y_snorm_bcast
;
dy
.
device
(
place
)
=
(
dz_bcast
*
grad
).
sum
(
Eigen
::
array
<
int
,
1
>
({{
0
}}));
dy
.
device
(
place
)
=
(
dz_bcast
*
grad
).
sum
(
Eigen
::
array
<
int
,
1
>
({{
0
}}));
}
}
...
...
paddle/operators/detail/safe_ref.h
0 → 100644
浏览文件 @
94e86897
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
namespace
paddle
{
namespace
operators
{
namespace
detail
{
/**
* Get Reference From Pointer with check. The error message is printf format,
* and passed by `args`
*/
template
<
typename
T
,
typename
...
ARGS
>
inline
T
&
Ref
(
T
*
ptr
,
ARGS
&&
...
args
)
{
PADDLE_ENFORCE
(
ptr
!=
nullptr
,
args
...);
return
*
ptr
;
}
}
// namespace detail
}
// namespace operators
}
// namespace paddle
paddle/operators/fill_constant_batch_size_like_op.cc
浏览文件 @
94e86897
...
@@ -101,4 +101,7 @@ REGISTER_OPERATOR(fill_constant_batch_size_like,
...
@@ -101,4 +101,7 @@ REGISTER_OPERATOR(fill_constant_batch_size_like,
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
fill_constant_batch_size_like
,
fill_constant_batch_size_like
,
ops
::
FillConstantBatchSizeLikeOpKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
FillConstantBatchSizeLikeOpKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
FillConstantBatchSizeLikeOpKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
ops
::
FillConstantBatchSizeLikeOpKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
,
ops
::
FillConstantBatchSizeLikeOpKernel
<
paddle
::
platform
::
CPUPlace
,
int
>
,
ops
::
FillConstantBatchSizeLikeOpKernel
<
paddle
::
platform
::
CPUPlace
,
int64_t
>
);
paddle/operators/fill_constant_batch_size_like_op.cu.cc
浏览文件 @
94e86897
...
@@ -19,4 +19,7 @@ namespace ops = paddle::operators;
...
@@ -19,4 +19,7 @@ namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL
(
REGISTER_OP_GPU_KERNEL
(
fill_constant_batch_size_like
,
fill_constant_batch_size_like
,
ops
::
FillConstantBatchSizeLikeOpKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
,
ops
::
FillConstantBatchSizeLikeOpKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
,
ops
::
FillConstantBatchSizeLikeOpKernel
<
paddle
::
platform
::
GPUPlace
,
double
>
);
ops
::
FillConstantBatchSizeLikeOpKernel
<
paddle
::
platform
::
GPUPlace
,
double
>
,
ops
::
FillConstantBatchSizeLikeOpKernel
<
paddle
::
platform
::
GPUPlace
,
int
>
,
ops
::
FillConstantBatchSizeLikeOpKernel
<
paddle
::
platform
::
GPUPlace
,
int64_t
>
);
paddle/operators/fill_zeros_like_op.cc
浏览文件 @
94e86897
...
@@ -54,5 +54,8 @@ namespace ops = paddle::operators;
...
@@ -54,5 +54,8 @@ namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT
(
fill_zeros_like
,
ops
::
FillZerosLikeOp
,
REGISTER_OP_WITHOUT_GRADIENT
(
fill_zeros_like
,
ops
::
FillZerosLikeOp
,
ops
::
FillZerosLikeOpMaker
);
ops
::
FillZerosLikeOpMaker
);
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
fill_zeros_like
,
fill_zeros_like
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUPlace
,
int
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUPlace
,
int64_t
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUPlace
,
bool
>
);
paddle/operators/fill_zeros_like_op.cu.cc
浏览文件 @
94e86897
...
@@ -17,5 +17,8 @@
...
@@ -17,5 +17,8 @@
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_GPU_KERNEL
(
REGISTER_OP_GPU_KERNEL
(
fill_zeros_like
,
fill_zeros_like
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
GPUPlace
,
int
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
GPUPlace
,
int64_t
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
GPUPlace
,
double
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
GPUPlace
,
bool
>
);
paddle/operators/is_empty_op.cc
0 → 100644
浏览文件 @
94e86897
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
namespace
paddle
{
namespace
operators
{
constexpr
char
kInput
[]
=
"X"
;
constexpr
char
kOutput
[]
=
"Out"
;
class
IsEmptyOp
:
public
framework
::
OperatorBase
{
public:
IsEmptyOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
OperatorBase
(
type
,
inputs
,
outputs
,
attrs
)
{}
void
Run
(
const
framework
::
Scope
&
scope
,
const
platform
::
DeviceContext
&
dev_ctx
)
const
override
{
// get input
auto
*
var
=
scope
.
FindVar
(
Input
(
kInput
));
PADDLE_ENFORCE_NOT_NULL
(
var
);
auto
&
tensor
=
var
->
Get
<
framework
::
LoDTensor
>
();
// get output
auto
*
out
=
scope
.
FindVar
(
Output
(
kOutput
));
PADDLE_ENFORCE_NOT_NULL
(
out
);
auto
*
out_tensor
=
out
->
GetMutable
<
framework
::
LoDTensor
>
();
out_tensor
->
Resize
({
1
});
out_tensor
->
mutable_data
<
bool
>
(
platform
::
CPUPlace
())[
0
]
=
framework
::
product
(
tensor
.
dims
())
==
0
;
}
};
class
IsEmptyOpProtoMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
IsEmptyOpProtoMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
kInput
,
"(Tensor) Tensor which is to be checked."
);
AddOutput
(
kOutput
,
"(Tensor) a boolean Tensor that indicate empty or not."
);
AddComment
(
R"DOC(
IsEmpty Operator which checks whether a tensor is empty.
It will just return product(tensor.ddims()) > 0;
)DOC"
);
}
};
}
// namespace operators
}
// namespace paddle
REGISTER_OP_WITHOUT_GRADIENT
(
is_empty
,
paddle
::
operators
::
IsEmptyOp
,
paddle
::
operators
::
IsEmptyOpProtoMaker
);
paddle/operators/math/CMakeLists.txt
浏览文件 @
94e86897
add_subdirectory
(
detail
)
add_subdirectory
(
detail
)
if
(
WITH_GPU
)
if
(
WITH_GPU
)
nv_library
(
math_function SRCS math_function.cc math_function.cu im2col.cc im2col.cu DEPS cblas device_context
)
nv_library
(
math_function SRCS math_function.cc math_function.cu im2col.cc im2col.cu DEPS cblas device_context
framework_proto
)
nv_test
(
math_function_gpu_test SRCS math_function_test.cu DEPS math_function tensor
)
nv_test
(
math_function_gpu_test SRCS math_function_test.cu DEPS math_function tensor
)
nv_library
(
selected_rows_functor SRCS selected_rows_functor.cc selected_rows_functor.cu DEPS selected_rows math_function
)
nv_library
(
selected_rows_functor SRCS selected_rows_functor.cc selected_rows_functor.cu DEPS selected_rows math_function
)
nv_test
(
selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu DEPS selected_rows_functor
)
nv_test
(
selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu DEPS selected_rows_functor
)
...
@@ -15,7 +15,7 @@ if(WITH_GPU)
...
@@ -15,7 +15,7 @@ if(WITH_GPU)
nv_library
(
lstm_compute SRCS lstm_compute.cc lstm_compute.cu DEPS device_context activation_functions
)
nv_library
(
lstm_compute SRCS lstm_compute.cc lstm_compute.cu DEPS device_context activation_functions
)
nv_library
(
gru_compute SRCS gru_compute.cc gru_compute.cu DEPS device_context activation_functions math_function
)
nv_library
(
gru_compute SRCS gru_compute.cc gru_compute.cu DEPS device_context activation_functions math_function
)
else
()
else
()
cc_library
(
math_function SRCS math_function.cc im2col.cc DEPS cblas device_context
)
cc_library
(
math_function SRCS math_function.cc im2col.cc DEPS cblas device_context
framework_proto
)
cc_library
(
selected_rows_functor SRCS selected_rows_functor.cc DEPS selected_rows math_function
)
cc_library
(
selected_rows_functor SRCS selected_rows_functor.cc DEPS selected_rows math_function
)
cc_library
(
softmax SRCS softmax.cc DEPS device_context
)
cc_library
(
softmax SRCS softmax.cc DEPS device_context
)
cc_library
(
cross_entropy SRCS cross_entropy.cc DEPS device_context
)
cc_library
(
cross_entropy SRCS cross_entropy.cc DEPS device_context
)
...
...
paddle/operators/math/im2col.cu
浏览文件 @
94e86897
...
@@ -119,8 +119,8 @@ __global__ void col2im(int n, const T* data_col, int im_height, int im_width,
...
@@ -119,8 +119,8 @@ __global__ void col2im(int n, const T* data_col, int im_height, int im_width,
if
(
index
<
n
)
{
if
(
index
<
n
)
{
T
val
=
0
;
T
val
=
0
;
int
w
=
index
%
im_width
;
int
w
=
index
%
im_width
+
padding_width
;
int
h
=
(
index
/
im_width
)
%
im_height
;
int
h
=
(
index
/
im_width
)
%
im_height
+
padding_height
;
int
c
=
index
/
(
im_width
*
im_height
);
int
c
=
index
/
(
im_width
*
im_height
);
// compute the start and end of the output
// compute the start and end of the output
...
...
paddle/operators/math/math_function.cc
浏览文件 @
94e86897
...
@@ -250,6 +250,8 @@ void axpy<platform::CPUPlace, double>(const platform::DeviceContext& context,
...
@@ -250,6 +250,8 @@ void axpy<platform::CPUPlace, double>(const platform::DeviceContext& context,
template
struct
SetConstant
<
platform
::
CPUPlace
,
float
>;
template
struct
SetConstant
<
platform
::
CPUPlace
,
float
>;
template
struct
SetConstant
<
platform
::
CPUPlace
,
double
>;
template
struct
SetConstant
<
platform
::
CPUPlace
,
double
>;
template
struct
SetConstant
<
platform
::
CPUPlace
,
int
>;
template
struct
SetConstant
<
platform
::
CPUPlace
,
int
>;
template
struct
SetConstant
<
platform
::
CPUPlace
,
int64_t
>;
template
struct
SetConstant
<
platform
::
CPUPlace
,
bool
>;
#define DEFINE_CPU_TRANS(RANK) \
#define DEFINE_CPU_TRANS(RANK) \
template struct Transpose<platform::CPUPlace, float, RANK>; \
template struct Transpose<platform::CPUPlace, float, RANK>; \
...
...
paddle/operators/math/math_function.cu
浏览文件 @
94e86897
...
@@ -256,6 +256,8 @@ void axpy<platform::GPUPlace, double>(const platform::DeviceContext& context,
...
@@ -256,6 +256,8 @@ void axpy<platform::GPUPlace, double>(const platform::DeviceContext& context,
template
struct
SetConstant
<
platform
::
GPUPlace
,
float
>;
template
struct
SetConstant
<
platform
::
GPUPlace
,
float
>;
template
struct
SetConstant
<
platform
::
GPUPlace
,
double
>;
template
struct
SetConstant
<
platform
::
GPUPlace
,
double
>;
template
struct
SetConstant
<
platform
::
GPUPlace
,
int
>;
template
struct
SetConstant
<
platform
::
GPUPlace
,
int
>;
template
struct
SetConstant
<
platform
::
GPUPlace
,
int64_t
>;
template
struct
SetConstant
<
platform
::
GPUPlace
,
bool
>;
#define DEFINE_GPU_TRANS(RANK) \
#define DEFINE_GPU_TRANS(RANK) \
template struct Transpose<platform::GPUPlace, float, RANK>; \
template struct Transpose<platform::GPUPlace, float, RANK>; \
...
...
paddle/operators/sum_op.cc
浏览文件 @
94e86897
...
@@ -12,6 +12,7 @@ limitations under the License. */
...
@@ -12,6 +12,7 @@ limitations under the License. */
#include "paddle/operators/sum_op.h"
#include "paddle/operators/sum_op.h"
#include <vector>
#include <vector>
#include "paddle/framework/var_type_inference.h"
#include "paddle/framework/var_type_inference.h"
#include "paddle/operators/detail/safe_ref.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -59,13 +60,16 @@ class SumOp : public framework::OperatorWithKernel {
...
@@ -59,13 +60,16 @@ class SumOp : public framework::OperatorWithKernel {
x_vars
[
0
]
->
Get
<
framework
::
SelectedRows
>
().
value
().
type
()),
x_vars
[
0
]
->
Get
<
framework
::
SelectedRows
>
().
value
().
type
()),
ctx
.
device_context
());
ctx
.
device_context
());
}
else
if
(
x_vars
[
0
]
->
IsType
<
framework
::
LoDTensorArray
>
())
{
}
else
if
(
x_vars
[
0
]
->
IsType
<
framework
::
LoDTensorArray
>
())
{
auto
&
array
=
x_vars
[
0
]
->
Get
<
framework
::
LoDTensorArray
>
();
for
(
auto
&
x_var
:
x_vars
)
{
for
(
auto
&
each
:
array
)
{
auto
&
array
=
x_var
->
Get
<
framework
::
LoDTensorArray
>
();
if
(
each
.
numel
()
!=
0
)
{
for
(
auto
&
each
:
array
)
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
each
.
type
()),
if
(
each
.
numel
()
!=
0
)
{
ctx
.
device_context
());
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
each
.
type
()),
ctx
.
device_context
());
}
}
}
}
}
PADDLE_THROW
(
"Cannot find the input data type by all input data"
);
}
}
PADDLE_THROW
(
"Unexpected branch. Input type is %s"
,
PADDLE_THROW
(
"Unexpected branch. Input type is %s"
,
x_vars
[
0
]
->
Type
().
name
());
x_vars
[
0
]
->
Type
().
name
());
...
@@ -96,6 +100,11 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
...
@@ -96,6 +100,11 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
auto
&
inputs
=
op_desc
.
Input
(
"X"
);
auto
&
inputs
=
op_desc
.
Input
(
"X"
);
auto
var_type
=
framework
::
VarDesc
::
SELECTED_ROWS
;
auto
var_type
=
framework
::
VarDesc
::
SELECTED_ROWS
;
for
(
auto
&
name
:
op_desc
.
Input
(
"X"
))
{
VLOG
(
10
)
<<
name
<<
" "
<<
block
->
FindRecursiveOrCreateVar
(
name
)
->
GetType
();
}
bool
any_input_is_lod_tensor
=
std
::
any_of
(
bool
any_input_is_lod_tensor
=
std
::
any_of
(
inputs
.
begin
(),
inputs
.
end
(),
[
block
](
const
std
::
string
&
name
)
{
inputs
.
begin
(),
inputs
.
end
(),
[
block
](
const
std
::
string
&
name
)
{
return
block
->
FindRecursiveOrCreateVar
(
name
)
->
GetType
()
==
return
block
->
FindRecursiveOrCreateVar
(
name
)
->
GetType
()
==
...
@@ -103,7 +112,7 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
...
@@ -103,7 +112,7 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
});
});
auto
is_tensor_array
=
[
block
](
const
std
::
string
&
name
)
{
auto
is_tensor_array
=
[
block
](
const
std
::
string
&
name
)
{
return
block
->
FindRecursiveOrCreateVar
(
name
)
->
GetType
()
==
return
detail
::
Ref
(
block
->
FindRecursiveOrCreateVar
(
name
)).
GetType
()
==
framework
::
VarDesc
::
LOD_TENSOR_ARRAY
;
framework
::
VarDesc
::
LOD_TENSOR_ARRAY
;
};
};
...
@@ -113,14 +122,26 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
...
@@ -113,14 +122,26 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
std
::
all_of
(
inputs
.
begin
(),
inputs
.
end
(),
is_tensor_array
);
std
::
all_of
(
inputs
.
begin
(),
inputs
.
end
(),
is_tensor_array
);
if
(
any_input_is_tensor_array
)
{
if
(
any_input_is_tensor_array
)
{
PADDLE_ENFORCE
(
all_inputs_are_tensor_array
);
if
(
!
all_inputs_are_tensor_array
)
{
std
::
ostringstream
os
;
for
(
auto
&
each
:
inputs
)
{
os
<<
" "
<<
each
<<
" type is "
<<
detail
::
Ref
(
block
->
FindRecursiveOrCreateVar
(
each
)).
GetType
()
<<
"
\n
"
;
}
PADDLE_ENFORCE
(
all_inputs_are_tensor_array
,
"Not all inputs are tensor array:
\n
%s"
,
os
.
str
());
}
var_type
=
framework
::
VarDesc
::
LOD_TENSOR_ARRAY
;
var_type
=
framework
::
VarDesc
::
LOD_TENSOR_ARRAY
;
}
else
if
(
any_input_is_lod_tensor
)
{
}
else
if
(
any_input_is_lod_tensor
)
{
var_type
=
framework
::
VarDesc
::
LOD_TENSOR
;
var_type
=
framework
::
VarDesc
::
LOD_TENSOR
;
}
}
auto
out_var_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
out_var_name
=
op_desc
.
Output
(
"Out"
).
front
();
block
->
FindRecursiveOrCreateVar
(
out_var_name
)
->
SetType
(
var_type
);
auto
&
out_var
=
detail
::
Ref
(
block
->
FindRecursiveOrCreateVar
(
out_var_name
));
out_var
.
SetType
(
var_type
);
auto
&
in_var
=
detail
::
Ref
(
block
->
FindVarRecursive
(
inputs
.
front
()));
out_var
.
SetDataType
(
in_var
.
GetDataType
());
}
}
};
};
...
...
paddle/operators/tensor_array_read_write_op.cc
浏览文件 @
94e86897
...
@@ -12,7 +12,7 @@
...
@@ -12,7 +12,7 @@
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/operators/array_operator.h"
#include "paddle/operators/array_operator.h"
#include "paddle/operators/detail/safe_ref.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -33,6 +33,8 @@ class WriteToArrayOp : public ArrayOp {
...
@@ -33,6 +33,8 @@ class WriteToArrayOp : public ArrayOp {
auto
*
out
=
auto
*
out
=
scope
.
FindVar
(
Output
(
"Out"
))
->
GetMutable
<
framework
::
LoDTensorArray
>
();
scope
.
FindVar
(
Output
(
"Out"
))
->
GetMutable
<
framework
::
LoDTensorArray
>
();
if
(
offset
>=
out
->
size
())
{
if
(
offset
>=
out
->
size
())
{
VLOG
(
10
)
<<
"Resize "
<<
Output
(
"Out"
)
<<
" from "
<<
out
->
size
()
<<
" to "
<<
offset
+
1
;
out
->
resize
(
offset
+
1
);
out
->
resize
(
offset
+
1
);
}
}
auto
*
out_tensor
=
&
out
->
at
(
offset
);
auto
*
out_tensor
=
&
out
->
at
(
offset
);
...
@@ -85,11 +87,15 @@ class WriteToArrayInferVarType : public framework::VarTypeInference {
...
@@ -85,11 +87,15 @@ class WriteToArrayInferVarType : public framework::VarTypeInference {
public:
public:
void
operator
()(
const
framework
::
OpDescBind
&
op_desc
,
void
operator
()(
const
framework
::
OpDescBind
&
op_desc
,
framework
::
BlockDescBind
*
block
)
const
override
{
framework
::
BlockDescBind
*
block
)
const
override
{
for
(
auto
&
out_var
:
op_desc
.
OutputArgumentNames
())
{
auto
x_name
=
op_desc
.
Input
(
"X"
)[
0
];
VLOG
(
10
)
<<
"Set Variable "
<<
out_var
<<
" as LOD_TENSOR_ARRAY"
;
auto
out_name
=
op_desc
.
Output
(
"Out"
)[
0
];
block
->
FindRecursiveOrCreateVar
(
out_var
)
->
SetType
(
VLOG
(
10
)
<<
"Set Variable "
<<
out_name
<<
" as LOD_TENSOR_ARRAY"
;
framework
::
VarDesc
::
LOD_TENSOR_ARRAY
);
auto
&
out
=
detail
::
Ref
(
block
->
FindRecursiveOrCreateVar
(
out_name
),
}
"Cannot found %s"
,
out_name
);
out
.
SetType
(
framework
::
VarDesc
::
LOD_TENSOR_ARRAY
);
auto
&
x
=
detail
::
Ref
(
block
->
FindVarRecursive
(
x_name
),
"Cannot found %s"
,
x_name
);
out
.
SetDataType
(
x
.
GetDataType
());
}
}
};
};
...
@@ -107,11 +113,11 @@ class ReadFromArrayOp : public ArrayOp {
...
@@ -107,11 +113,11 @@ class ReadFromArrayOp : public ArrayOp {
auto
&
x_array
=
x
->
Get
<
framework
::
LoDTensorArray
>
();
auto
&
x_array
=
x
->
Get
<
framework
::
LoDTensorArray
>
();
auto
*
out
=
scope
.
FindVar
(
Output
(
"Out"
));
auto
*
out
=
scope
.
FindVar
(
Output
(
"Out"
));
PADDLE_ENFORCE
(
out
!=
nullptr
,
"Out must be set"
);
PADDLE_ENFORCE
(
out
!=
nullptr
,
"Out must be set"
);
auto
*
out_te
sn
or
=
out
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
out_te
ns
or
=
out
->
GetMutable
<
framework
::
LoDTensor
>
();
size_t
offset
=
GetOffset
(
scope
,
dev_ctx
);
size_t
offset
=
GetOffset
(
scope
,
dev_ctx
);
PADDLE_ENFORCE_LT
(
offset
,
x_array
.
size
());
PADDLE_ENFORCE_LT
(
offset
,
x_array
.
size
());
out_te
sn
or
->
CopyFrom
(
x_array
[
offset
],
dev_ctx
.
GetPlace
(),
dev_ctx
);
out_te
ns
or
->
CopyFrom
(
x_array
[
offset
],
dev_ctx
.
GetPlace
(),
dev_ctx
);
out_te
sn
or
->
set_lod
(
x_array
[
offset
].
lod
());
out_te
ns
or
->
set_lod
(
x_array
[
offset
].
lod
());
}
}
};
};
...
...
paddle/operators/while_op.cc
浏览文件 @
94e86897
...
@@ -14,8 +14,10 @@
...
@@ -14,8 +14,10 @@
#include <vector>
#include <vector>
#include "paddle/framework/executor.h"
#include "paddle/framework/executor.h"
#include "paddle/framework/lod_tensor_array.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
#include "paddle/framework/operator.h"
#include "paddle/operators/detail/safe_ref.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -26,8 +28,9 @@ using LoDTensor = framework::LoDTensor;
...
@@ -26,8 +28,9 @@ using LoDTensor = framework::LoDTensor;
constexpr
char
kStepBlock
[]
=
"step_block"
;
constexpr
char
kStepBlock
[]
=
"step_block"
;
constexpr
char
kCondition
[]
=
"Condition"
;
constexpr
char
kCondition
[]
=
"Condition"
;
constexpr
char
kStepScopes
[]
=
"StepScopes"
;
constexpr
char
kStepScopes
[]
=
"StepScopes"
;
constexpr
char
kParamGrads
[]
=
"X@Grad"
;
constexpr
char
kParameters
[]
=
"X"
;
constexpr
char
kParameters
[]
=
"X"
;
constexpr
char
kParamGrads
[]
=
"X@GRAD"
;
constexpr
char
kOutputs
[]
=
"Out"
;
class
WhileOp
:
public
framework
::
OperatorBase
{
class
WhileOp
:
public
framework
::
OperatorBase
{
public:
public:
...
@@ -71,9 +74,9 @@ class WhileOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -71,9 +74,9 @@ class WhileOpMaker : public framework::OpProtoAndCheckerMaker {
kCondition
,
kCondition
,
"(Bool) An scalar. When it's False, the While Op will be terminated."
)
"(Bool) An scalar. When it's False, the While Op will be terminated."
)
.
AsDuplicable
();
.
AsDuplicable
();
AddOutput
(
"Out"
,
AddOutput
(
kOutputs
,
"A set of variables, which will be assigned with values "
"A set of variables, which will be assigned with values "
"generated by perators inside the block of While Op."
)
"generated by
the o
perators inside the block of While Op."
)
.
AsDuplicable
();
.
AsDuplicable
();
AddOutput
(
kStepScopes
,
AddOutput
(
kStepScopes
,
"(StepScopeVar) A vector of local scope, which size equals the "
"(StepScopeVar) A vector of local scope, which size equals the "
...
@@ -104,17 +107,64 @@ class WhileGradOp : public framework::OperatorBase {
...
@@ -104,17 +107,64 @@ class WhileGradOp : public framework::OperatorBase {
auto
*
step_scopes
=
auto
*
step_scopes
=
scope
.
FindVar
(
Input
(
kStepScopes
))
->
GetMutable
<
StepScopeVar
>
();
scope
.
FindVar
(
Input
(
kStepScopes
))
->
GetMutable
<
StepScopeVar
>
();
auto
outside_og_names
=
Inputs
(
framework
::
GradVarName
(
kOutputs
));
auto
inside_og_names
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"original_output_grad"
);
PADDLE_ENFORCE_EQ
(
outside_og_names
.
size
(),
inside_og_names
.
size
());
for
(
auto
cur_scope_iter
=
step_scopes
->
rbegin
();
for
(
auto
cur_scope_iter
=
step_scopes
->
rbegin
();
cur_scope_iter
!=
step_scopes
->
rend
();
++
cur_scope_iter
)
{
cur_scope_iter
!=
step_scopes
->
rend
();
++
cur_scope_iter
)
{
VLOG
(
3
)
<<
"Start backward at time_step "
<<
cur_scope_iter
-
step_scopes
->
rbegin
();
framework
::
Scope
&
cur_scope
=
**
cur_scope_iter
;
// Link OG from outside to inside
for
(
size_t
i
=
0
;
i
<
outside_og_names
.
size
();
++
i
)
{
auto
outside_og_name
=
outside_og_names
[
i
];
auto
inside_og_name
=
inside_og_names
[
i
];
VLOG
(
10
)
<<
"Linking outside "
<<
outside_og_name
<<
" --> inside "
<<
inside_og_name
;
auto
&
og_outside
=
detail
::
Ref
(
scope
.
FindVar
(
outside_og_name
));
auto
&
og_inside
=
detail
::
Ref
(
cur_scope
.
Var
(
inside_og_name
));
if
(
og_outside
.
Type
().
hash_code
()
==
typeid
(
framework
::
LoDTensor
).
hash_code
())
{
auto
&
outside_tensor
=
og_outside
.
Get
<
framework
::
LoDTensor
>
();
auto
&
inside_tensor
=
detail
::
Ref
(
og_inside
.
GetMutable
<
framework
::
LoDTensor
>
());
inside_tensor
.
set_lod
(
outside_tensor
.
lod
());
inside_tensor
.
ShareDataWith
(
outside_tensor
);
}
else
if
(
og_outside
.
Type
().
hash_code
()
==
typeid
(
framework
::
LoDTensorArray
).
hash_code
())
{
auto
&
outside_array
=
og_outside
.
Get
<
framework
::
LoDTensorArray
>
();
auto
&
inside_array
=
detail
::
Ref
(
og_inside
.
GetMutable
<
framework
::
LoDTensorArray
>
());
VLOG
(
10
)
<<
outside_og_name
<<
" size = "
<<
outside_array
.
size
();
inside_array
.
resize
(
outside_array
.
size
());
for
(
size_t
j
=
0
;
j
<
inside_array
.
size
();
++
j
)
{
VLOG
(
10
)
<<
j
<<
" "
<<
outside_array
[
j
].
numel
();
if
(
outside_array
[
j
].
numel
()
!=
0
)
{
inside_array
[
j
].
set_lod
(
outside_array
[
j
].
lod
());
inside_array
[
j
].
ShareDataWith
(
outside_array
[
j
]);
}
else
{
PADDLE_ENFORCE_EQ
(
inside_array
[
j
].
numel
(),
0
);
}
}
}
}
executor
.
Run
(
*
program
,
*
cur_scope_iter
,
block
->
ID
(),
false
);
executor
.
Run
(
*
program
,
*
cur_scope_iter
,
block
->
ID
(),
false
);
auto
&
pg_names
=
Outputs
(
kParamGrads
);
auto
&
pg_names
=
Outputs
(
kParamGrads
);
auto
&
p_names
=
Inputs
(
kParameters
);
auto
&
p_names
=
Inputs
(
kParameters
);
PADDLE_ENFORCE_EQ
(
pg_names
.
size
(),
p_names
.
size
());
PADDLE_ENFORCE_EQ
(
pg_names
.
size
(),
p_names
.
size
());
for
(
size_t
prog_id
=
0
;
prog_id
<
pg_names
.
size
();
++
prog_id
)
{
for
(
size_t
param_id
=
0
;
param_id
<
pg_names
.
size
();
++
param_id
)
{
auto
inside_grad_name
=
framework
::
GradVarName
(
p_names
[
prog_id
]);
if
(
pg_names
[
param_id
]
==
framework
::
kEmptyVarName
)
{
continue
;
// iterator doesn't have gradient
}
auto
inside_grad_name
=
framework
::
GradVarName
(
p_names
[
param_id
]);
// // TODO(tonyyang-s
avil
: Not sure we need the following
// // TODO(tonyyang-s
vail)
: Not sure we need the following
// // If does not compute gradient of that variable inside rnn,
// // If does not compute gradient of that variable inside rnn,
// just
// just
// // continue
// // continue
...
@@ -126,7 +176,7 @@ class WhileGradOp : public framework::OperatorBase {
...
@@ -126,7 +176,7 @@ class WhileGradOp : public framework::OperatorBase {
// zero gradient variable in step 0
// zero gradient variable in step 0
if
(
cur_scope_iter
==
step_scopes
->
rbegin
())
{
if
(
cur_scope_iter
==
step_scopes
->
rbegin
())
{
auto
*
var
=
(
*
cur_scope_iter
)
->
FindVar
(
inside_grad_name
);
auto
*
var
=
(
*
cur_scope_iter
)
->
FindVar
(
inside_grad_name
);
PADDLE_ENFORCE_NOT_NULL
(
var
);
PADDLE_ENFORCE_NOT_NULL
(
var
,
"Can not find var %s"
,
inside_grad_name
);
if
(
var
->
IsType
<
LoDTensor
>
())
{
if
(
var
->
IsType
<
LoDTensor
>
())
{
auto
&
inside_tensor
=
var
->
Get
<
framework
::
LoDTensor
>
();
auto
&
inside_tensor
=
var
->
Get
<
framework
::
LoDTensor
>
();
framework
::
AttributeMap
attrs
;
framework
::
AttributeMap
attrs
;
...
@@ -135,27 +185,18 @@ class WhileGradOp : public framework::OperatorBase {
...
@@ -135,27 +185,18 @@ class WhileGradOp : public framework::OperatorBase {
attrs
[
"value"
]
=
0.0
f
;
attrs
[
"value"
]
=
0.0
f
;
auto
zero_op
=
framework
::
OpRegistry
::
CreateOp
(
auto
zero_op
=
framework
::
OpRegistry
::
CreateOp
(
"fill_constant"
,
{},
{{
"Out"
,
{
pg_names
[
p
rog
_id
]}}},
attrs
);
"fill_constant"
,
{},
{{
"Out"
,
{
pg_names
[
p
aram
_id
]}}},
attrs
);
zero_op
->
Run
(
scope
,
dev_ctx
);
zero_op
->
Run
(
scope
,
dev_ctx
);
}
}
}
}
// sum gradient
// sum gradient
auto
*
outside_var
=
scope
.
FindVar
(
pg_names
[
prog_id
]);
auto
new_inside_name
=
cur_scope
.
Rename
(
inside_grad_name
);
PADDLE_ENFORCE_NOT_NULL
(
outside_var
);
auto
&
outside_tensor
=
*
outside_var
->
GetMutable
<
framework
::
LoDTensor
>
();
std
::
string
result_var_name
;
auto
*
local_result_var
=
(
*
cur_scope_iter
)
->
Var
(
&
result_var_name
);
auto
&
local_result_tensor
=
*
local_result_var
->
GetMutable
<
framework
::
LoDTensor
>
();
local_result_tensor
.
ShareDataWith
(
outside_tensor
);
auto
sum_op
=
framework
::
OpRegistry
::
CreateOp
(
auto
sum_op
=
framework
::
OpRegistry
::
CreateOp
(
"sum"
,
{{
"X"
,
{
result_var_name
,
inside_grad_name
}}},
"sum"
,
{{
"X"
,
{
pg_names
[
param_id
],
new_inside_name
}}},
{{
"Out"
,
{
result_var_name
}}},
{});
{{
"Out"
,
{
pg_names
[
param_id
]}}},
{});
sum_op
->
Run
(
**
cur_scope_iter
,
dev_ctx
);
sum_op
->
Run
(
cur_scope
,
dev_ctx
);
cur_scope
.
Rename
(
new_inside_name
,
inside_grad_name
);
}
}
}
}
}
}
...
@@ -169,29 +210,110 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker {
...
@@ -169,29 +210,110 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker {
virtual
std
::
unique_ptr
<
framework
::
OpDescBind
>
Apply
()
const
{
virtual
std
::
unique_ptr
<
framework
::
OpDescBind
>
Apply
()
const
{
auto
*
grad
=
new
framework
::
OpDescBind
();
auto
*
grad
=
new
framework
::
OpDescBind
();
grad
->
SetType
(
"while_grad"
);
grad
->
SetType
(
"while_grad"
);
for
(
auto
&
input_param
:
this
->
InputNames
())
{
grad
->
SetInput
(
kParameters
,
Input
(
kParameters
));
grad
->
SetInput
(
input_param
,
this
->
Input
(
input_param
));
grad
->
SetOutput
(
grad
->
SetOutput
(
framework
::
GradVarName
(
input_param
),
framework
::
GradVarName
(
kParameters
),
this
->
InputGrad
(
input_param
));
InputGrad
(
kParameters
,
/*do not drop empty gradient*/
false
));
grad
->
SetInput
(
kOutputs
,
Output
(
kOutputs
));
// OG should be re-calculated by step blocks, since many outputs of while op
// do not need to calculate gradients.
std
::
unordered_set
<
std
::
string
>
block_ins
;
{
for
(
auto
&
p
:
Input
(
kParameters
))
{
block_ins
.
insert
(
p
);
}
for
(
auto
&
o
:
Output
(
kOutputs
))
{
block_ins
.
insert
(
o
);
}
}
}
std
::
unordered_set
<
std
::
string
>
extra_inputs
;
for
(
size_t
i
=
0
;
i
<
grad_block_
[
0
]
->
OpSize
();
++
i
)
{
for
(
auto
&
input_name
:
grad_block_
[
0
]
->
Op
(
i
)
->
InputArgumentNames
())
{
if
(
block_ins
.
find
(
input_name
)
!=
block_ins
.
end
())
{
continue
;
}
extra_inputs
.
insert
(
input_name
);
}
for
(
auto
&
output_param
:
this
->
OutputNames
())
{
for
(
auto
&
output_name
:
grad_block_
[
0
]
->
Op
(
i
)
->
OutputArgumentNames
())
{
grad
->
SetInput
(
output_param
,
this
->
Output
(
output_param
));
block_ins
.
insert
(
output_name
);
if
(
output_param
!=
kStepScopes
)
{
grad
->
SetInput
(
framework
::
GradVarName
(
output_param
),
this
->
OutputGrad
(
output_param
));
}
}
}
}
std
::
vector
<
std
::
string
>
extra_inputs_list
;
extra_inputs_list
.
resize
(
extra_inputs
.
size
());
std
::
copy
(
extra_inputs
.
begin
(),
extra_inputs
.
end
(),
extra_inputs_list
.
begin
());
grad
->
SetInput
(
framework
::
GradVarName
(
kOutputs
),
extra_inputs_list
);
grad
->
SetInput
(
kStepScopes
,
Output
(
kStepScopes
));
grad
->
SetAttrMap
(
this
->
Attrs
());
grad
->
SetAttrMap
(
this
->
Attrs
());
grad
->
SetBlockAttr
(
kStepBlock
,
*
grad_block_
[
0
]);
grad
->
SetBlockAttr
(
kStepBlock
,
*
grad_block_
[
0
]);
// record the original output gradient names, since the gradient name of
// while operator could be renamed.
grad
->
SetAttr
(
"original_output_grad"
,
extra_inputs_list
);
return
std
::
unique_ptr
<
framework
::
OpDescBind
>
(
grad
);
return
std
::
unique_ptr
<
framework
::
OpDescBind
>
(
grad
);
}
}
};
};
class
WhileGradOpVarTypeInference
:
public
framework
::
VarTypeInference
{
public:
void
operator
()(
const
framework
::
OpDescBind
&
op_desc
,
framework
::
BlockDescBind
*
block
)
const
override
{
auto
p_names
=
op_desc
.
Input
(
kParameters
);
auto
pg_names
=
op_desc
.
Output
(
framework
::
GradVarName
(
kParameters
));
for
(
size_t
i
=
0
;
i
<
p_names
.
size
();
++
i
)
{
auto
&
p_var
=
detail
::
Ref
(
block
->
FindVarRecursive
(
p_names
[
i
]));
auto
*
g_var
=
block
->
FindVarRecursive
(
pg_names
[
i
]);
if
(
g_var
!=
nullptr
)
{
// Gradient could be @EMPTY@
VLOG
(
5
)
<<
"Setting "
<<
pg_names
[
i
]
<<
" following "
<<
p_names
[
i
]
<<
" type: "
<<
p_var
.
GetType
();
g_var
->
SetType
(
p_var
.
GetType
());
g_var
->
SetDataType
(
p_var
.
GetDataType
());
}
}
}
};
class
WhileGradOpShapeInference
:
public
framework
::
InferShapeBase
{
public:
void
operator
()(
framework
::
InferShapeContext
*
ctx
)
const
override
{
ctx
->
HasInputs
(
kParameters
);
ctx
->
HasOutputs
(
framework
::
GradVarName
(
kParameters
));
ctx
->
HasInputs
(
kOutputs
);
ctx
->
HasInputs
(
framework
::
GradVarName
(
kOutputs
));
auto
p_names
=
ctx
->
Inputs
(
kParameters
);
auto
pg_names
=
ctx
->
Outputs
(
kParamGrads
);
auto
dims
=
ctx
->
GetInputsDim
(
kParameters
);
auto
var_types
=
ctx
->
GetInputsVarType
(
kParameters
);
std
::
vector
<
std
::
string
>
names_to_set
;
std
::
vector
<
framework
::
DDim
>
dims_to_set
;
for
(
size_t
i
=
0
;
i
<
p_names
.
size
();
++
i
)
{
if
(
pg_names
[
i
]
==
framework
::
kEmptyVarName
)
{
continue
;
}
if
(
var_types
[
i
]
==
framework
::
VarDesc
::
LOD_TENSOR
)
{
names_to_set
.
push_back
(
pg_names
[
i
]);
dims_to_set
.
push_back
(
dims
[
i
]);
}
else
if
(
var_types
[
i
]
==
framework
::
VarDesc
::
LOD_TENSOR_ARRAY
)
{
// not sure how to set the dim of LOD_TENSOR_ARRAY
names_to_set
.
push_back
(
pg_names
[
i
]);
dims_to_set
.
push_back
(
dims
[
i
]);
}
}
ctx
->
SetDims
(
names_to_set
,
dims_to_set
);
}
};
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
REGISTER_OPERATOR
(
while
,
paddle
::
operators
::
WhileOp
,
REGISTER_OPERATOR
(
while
,
paddle
::
operators
::
WhileOp
,
paddle
::
operators
::
WhileOpMaker
,
paddle
::
operators
::
WhileOpMaker
,
paddle
::
operators
::
WhileGradOpDescMaker
);
paddle
::
operators
::
WhileGradOpDescMaker
);
REGISTER_OPERATOR
(
while_grad
,
paddle
::
operators
::
WhileGradOp
,
paddle
::
operators
::
WhileGradOpShapeInference
,
paddle
::
operators
::
WhileGradOpVarTypeInference
);
paddle/scripts/docker/README.md
浏览文件 @
94e86897
...
@@ -57,8 +57,7 @@ Users can specify the following Docker build arguments with either "ON" or "OFF"
...
@@ -57,8 +57,7 @@ Users can specify the following Docker build arguments with either "ON" or "OFF"
|
`WITH_GPU`
| OFF | Generates NVIDIA CUDA GPU code and relies on CUDA libraries. |
|
`WITH_GPU`
| OFF | Generates NVIDIA CUDA GPU code and relies on CUDA libraries. |
|
`WITH_AVX`
| OFF | Set to "ON" to enable AVX support. |
|
`WITH_AVX`
| OFF | Set to "ON" to enable AVX support. |
|
`WITH_TESTING`
| ON | Build unit tests binaries. |
|
`WITH_TESTING`
| ON | Build unit tests binaries. |
|
`WITH_MKLDNN`
| ON | Build with
[
Intel® MKL DNN
](
https://github.com/01org/mkl-dnn
)
support. |
|
`WITH_MKL`
| ON | Build with
[
Intel® MKL
](
https://software.intel.com/en-us/mkl
)
and
[
Intel® MKL-DNN
](
https://github.com/01org/mkl-dnn
)
support. |
|
`WITH_MKLML`
| ON | Build with
[
Intel® MKL
](
https://software.intel.com/en-us/mkl
)
support. |
|
`WITH_GOLANG`
| ON | Build fault-tolerant parameter server written in go. |
|
`WITH_GOLANG`
| ON | Build fault-tolerant parameter server written in go. |
|
`WITH_SWIG_PY`
| ON | Build with SWIG python API support. |
|
`WITH_SWIG_PY`
| ON | Build with SWIG python API support. |
|
`WITH_C_API`
| OFF | Build capi libraries for inference. |
|
`WITH_C_API`
| OFF | Build capi libraries for inference. |
...
...
paddle/scripts/docker/build.sh
浏览文件 @
94e86897
...
@@ -34,9 +34,7 @@ function cmake_gen() {
...
@@ -34,9 +34,7 @@ function cmake_gen() {
${
PYTHON_FLAGS
}
${
PYTHON_FLAGS
}
-DWITH_DOC=OFF
-DWITH_DOC=OFF
-DWITH_GPU=
${
WITH_GPU
:-
OFF
}
-DWITH_GPU=
${
WITH_GPU
:-
OFF
}
-DCUDA_ARCH_NAME=All
-DWITH_MKL=
${
WITH_MKL
:-
ON
}
-DWITH_MKLDNN=
${
WITH_MKLDNN
:-
ON
}
-DWITH_MKLML=
${
WITH_MKLML
:-
ON
}
-DWITH_AVX=
${
WITH_AVX
:-
OFF
}
-DWITH_AVX=
${
WITH_AVX
:-
OFF
}
-DWITH_GOLANG=
${
WITH_GOLANG
:-
ON
}
-DWITH_GOLANG=
${
WITH_GOLANG
:-
ON
}
-DWITH_SWIG_PY=ON
-DWITH_SWIG_PY=ON
...
@@ -57,9 +55,7 @@ EOF
...
@@ -57,9 +55,7 @@ EOF
${
PYTHON_FLAGS
}
\
${
PYTHON_FLAGS
}
\
-DWITH_DOC
=
OFF
\
-DWITH_DOC
=
OFF
\
-DWITH_GPU
=
${
WITH_GPU
:-
OFF
}
\
-DWITH_GPU
=
${
WITH_GPU
:-
OFF
}
\
-DCUDA_ARCH_NAME
=
All
\
-DWITH_MKL
=
${
WITH_MKL
:-
ON
}
\
-DWITH_MKLDNN
=
${
WITH_MKLDNN
:-
ON
}
\
-DWITH_MKLML
=
${
WITH_MKLML
:-
ON
}
\
-DWITH_AVX
=
${
WITH_AVX
:-
OFF
}
\
-DWITH_AVX
=
${
WITH_AVX
:-
OFF
}
\
-DWITH_GOLANG
=
${
WITH_GOLANG
:-
ON
}
\
-DWITH_GOLANG
=
${
WITH_GOLANG
:-
ON
}
\
-DWITH_SWIG_PY
=
${
WITH_SWIG_PY
:-
ON
}
\
-DWITH_SWIG_PY
=
${
WITH_SWIG_PY
:-
ON
}
\
...
...
paddle/scripts/submit_local.sh.in
浏览文件 @
94e86897
...
@@ -18,8 +18,8 @@ function version(){
...
@@ -18,8 +18,8 @@ function version(){
echo
"PaddlePaddle @PADDLE_VERSION@, compiled with"
echo
"PaddlePaddle @PADDLE_VERSION@, compiled with"
echo
" with_avx: @WITH_AVX@"
echo
" with_avx: @WITH_AVX@"
echo
" with_gpu: @WITH_GPU@"
echo
" with_gpu: @WITH_GPU@"
echo
" with_mkl: @WITH_MKL@"
echo
" with_mkldnn: @WITH_MKLDNN@"
echo
" with_mkldnn: @WITH_MKLDNN@"
echo
" with_mklml: @WITH_MKLML@"
echo
" with_double: @WITH_DOUBLE@"
echo
" with_double: @WITH_DOUBLE@"
echo
" with_python: @WITH_PYTHON@"
echo
" with_python: @WITH_PYTHON@"
echo
" with_rdma: @WITH_RDMA@"
echo
" with_rdma: @WITH_RDMA@"
...
@@ -45,8 +45,8 @@ function ver2num() {
...
@@ -45,8 +45,8 @@ function ver2num() {
function
cpu_config
()
{
function
cpu_config
()
{
# auto set KMP_AFFINITY and OMP_DYNAMIC from Hyper Threading Status
# auto set KMP_AFFINITY and OMP_DYNAMIC from Hyper Threading Status
# only when MKL
DNN or MKLML
enabled
# only when MKL enabled
if
[
"@WITH_MKL
DNN@"
==
"OFF"
]
&&
[
"@WITH_MKLML@"
==
"OFF"
]
;
then
if
[
"@WITH_MKL
@"
==
"OFF"
]
;
then
return
0
return
0
fi
fi
ht
=
`
lscpu |grep
"per core"
|awk
-F
':'
'{print $2}'
|xargs
`
ht
=
`
lscpu |grep
"per core"
|awk
-F
':'
'{print $2}'
|xargs
`
...
@@ -70,8 +70,8 @@ function cpu_config() {
...
@@ -70,8 +70,8 @@ function cpu_config() {
function
threads_config
()
{
function
threads_config
()
{
# auto set OMP_NUM_THREADS and MKL_NUM_THREADS
# auto set OMP_NUM_THREADS and MKL_NUM_THREADS
# according to trainer_count and total processors
# according to trainer_count and total processors
# only when MKL
DNN or MKLML
enabled
# only when MKL enabled
if
[
"@WITH_MKL
DNN@"
==
"OFF"
]
&&
[
"@WITH_MKLML@"
==
"OFF"
]
;
then
if
[
"@WITH_MKL
@"
==
"OFF"
]
;
then
return
0
return
0
fi
fi
processors
=
`
grep
"processor"
/proc/cpuinfo|sort
-u
|wc
-l
`
processors
=
`
grep
"processor"
/proc/cpuinfo|sort
-u
|wc
-l
`
...
...
paddle/scripts/travis/build_doc.sh
浏览文件 @
94e86897
...
@@ -6,7 +6,7 @@ mkdir -p $TRAVIS_BUILD_DIR/build
...
@@ -6,7 +6,7 @@ mkdir -p $TRAVIS_BUILD_DIR/build
cd
$TRAVIS_BUILD_DIR
/build
cd
$TRAVIS_BUILD_DIR
/build
# Compile Documentation only.
# Compile Documentation only.
cmake ..
-DCMAKE_BUILD_TYPE
=
Debug
-DWITH_GPU
=
OFF
-DWITH_MKL
DNN
=
OFF
-DWITH_MKLML
=
OFF
-DWITH_DOC
=
ON
cmake ..
-DCMAKE_BUILD_TYPE
=
Debug
-DWITH_GPU
=
OFF
-DWITH_MKL
=
OFF
-DWITH_DOC
=
ON
make
-j
`
nproc
`
gen_proto_py
make
-j
`
nproc
`
gen_proto_py
make
-j
`
nproc
`
paddle_docs paddle_docs_cn
make
-j
`
nproc
`
paddle_docs paddle_docs_cn
...
...
paddle/trainer/Trainer.cpp
浏览文件 @
94e86897
...
@@ -137,6 +137,10 @@ void Trainer::init(const std::shared_ptr<TrainerConfigHelper>& config,
...
@@ -137,6 +137,10 @@ void Trainer::init(const std::shared_ptr<TrainerConfigHelper>& config,
}
}
}
}
if
(
FLAGS_use_mkldnn
)
{
CHECK_EQ
(
FLAGS_trainer_count
,
1UL
)
<<
"MKLDNN only need 1 trainer"
;
}
if
(
testing
)
{
if
(
testing
)
{
LOG
(
INFO
)
<<
"trainer: in testing mode"
;
LOG
(
INFO
)
<<
"trainer: in testing mode"
;
if
(
config_
->
getOptConfig
().
use_sparse_remote_updater
()
||
if
(
config_
->
getOptConfig
().
use_sparse_remote_updater
()
||
...
...
python/paddle/v2/fluid/framework.py
浏览文件 @
94e86897
...
@@ -12,9 +12,9 @@ def unique_name(prefix):
...
@@ -12,9 +12,9 @@ def unique_name(prefix):
return
"_"
.
join
([
prefix
,
str
(
uid
)])
return
"_"
.
join
([
prefix
,
str
(
uid
)])
def
_debug_string_
(
proto
):
def
_debug_string_
(
proto
,
throw_on_error
=
True
):
error_fields
=
list
()
error_fields
=
list
()
if
not
proto
.
IsInitialized
(
error_fields
):
if
not
proto
.
IsInitialized
(
error_fields
)
and
throw_on_error
:
raise
ValueError
(
"{0} are not initialized
\n
The message is {1}"
.
format
(
raise
ValueError
(
"{0} are not initialized
\n
The message is {1}"
.
format
(
error_fields
,
proto
))
error_fields
,
proto
))
return
proto
.
__str__
()
return
proto
.
__str__
()
...
@@ -101,9 +101,12 @@ class Variable(object):
...
@@ -101,9 +101,12 @@ class Variable(object):
self
.
stop_gradient
=
stop_gradient
self
.
stop_gradient
=
stop_gradient
def
__str__
(
self
):
def
__str__
(
self
):
return
self
.
to_string
(
True
)
def
to_string
(
self
,
throw_on_error
):
protostr
=
self
.
desc
.
serialize_to_string
()
protostr
=
self
.
desc
.
serialize_to_string
()
proto
=
framework_pb2
.
VarDesc
.
FromString
(
str
(
protostr
))
proto
=
framework_pb2
.
VarDesc
.
FromString
(
str
(
protostr
))
return
_debug_string_
(
proto
)
return
_debug_string_
(
proto
,
throw_on_error
)
__repr__
=
__str__
__repr__
=
__str__
...
@@ -291,10 +294,13 @@ class Operator(object):
...
@@ -291,10 +294,13 @@ class Operator(object):
self
.
desc
.
infer_var_type
(
self
.
block
.
desc
)
self
.
desc
.
infer_var_type
(
self
.
block
.
desc
)
self
.
desc
.
infer_shape
(
self
.
block
.
desc
)
self
.
desc
.
infer_shape
(
self
.
block
.
desc
)
def
__str__
(
self
):
def
to_string
(
self
,
throw_on_error
):
protostr
=
self
.
desc
.
serialize_to_string
()
protostr
=
self
.
desc
.
serialize_to_string
()
proto
=
framework_pb2
.
OpDesc
.
FromString
(
str
(
protostr
))
proto
=
framework_pb2
.
OpDesc
.
FromString
(
str
(
protostr
))
return
_debug_string_
(
proto
)
return
_debug_string_
(
proto
,
throw_on_error
)
def
__str__
(
self
):
return
self
.
to_string
(
True
)
__repr__
=
__str__
__repr__
=
__str__
...
@@ -349,9 +355,12 @@ class Block(object):
...
@@ -349,9 +355,12 @@ class Block(object):
self
.
program
=
program
self
.
program
=
program
def
__str__
(
self
):
def
__str__
(
self
):
return
self
.
to_string
(
True
)
def
to_string
(
self
,
throw_on_error
):
protostr
=
self
.
desc
.
serialize_to_string
()
protostr
=
self
.
desc
.
serialize_to_string
()
proto
=
framework_pb2
.
BlockDesc
.
FromString
(
str
(
protostr
))
proto
=
framework_pb2
.
BlockDesc
.
FromString
(
str
(
protostr
))
return
_debug_string_
(
proto
)
return
_debug_string_
(
proto
,
throw_on_error
)
__repr__
=
__str__
__repr__
=
__str__
...
@@ -454,9 +463,12 @@ class Program(object):
...
@@ -454,9 +463,12 @@ class Program(object):
self
.
current_block_idx
=
0
self
.
current_block_idx
=
0
def
__str__
(
self
):
def
__str__
(
self
):
return
self
.
to_string
(
True
)
def
to_string
(
self
,
throw_on_error
):
protostr
=
self
.
desc
.
serialize_to_string
()
protostr
=
self
.
desc
.
serialize_to_string
()
proto
=
framework_pb2
.
ProgramDesc
.
FromString
(
str
(
protostr
))
proto
=
framework_pb2
.
ProgramDesc
.
FromString
(
str
(
protostr
))
return
_debug_string_
(
proto
)
return
_debug_string_
(
proto
,
throw_on_error
)
def
clone
(
self
):
def
clone
(
self
):
p
=
Program
()
p
=
Program
()
...
@@ -512,7 +524,14 @@ class Program(object):
...
@@ -512,7 +524,14 @@ class Program(object):
assert
isinstance
(
target
,
Variable
)
assert
isinstance
(
target
,
Variable
)
if
no_grad_set
is
None
:
if
no_grad_set
is
None
:
no_grad_set
=
set
()
no_grad_set
=
set
()
param_to_grad_info
=
self
.
desc
.
append_backward
(
target
.
desc
,
no_grad_set
)
try
:
param_to_grad_info
=
self
.
desc
.
append_backward
(
target
.
desc
,
no_grad_set
)
except
Exception
as
e
:
raise
core
.
EnforceNotMet
(
str
(
e
)
+
"
\n
Current protobuf is
\n
{0}"
.
format
(
self
.
to_string
(
False
)))
self
.
sync_with_cpp
()
self
.
sync_with_cpp
()
return
param_to_grad_info
return
param_to_grad_info
...
...
python/paddle/v2/fluid/net_drawer.py
浏览文件 @
94e86897
...
@@ -66,10 +66,13 @@ def parse_graph(program, graph, var_dict, **kwargs):
...
@@ -66,10 +66,13 @@ def parse_graph(program, graph, var_dict, **kwargs):
if
not
var_dict
.
has_key
(
var
):
if
not
var_dict
.
has_key
(
var
):
var_dict
[
var
]
=
"Feed"
var_dict
[
var
]
=
"Feed"
temp_id
=
0
proto
=
framework_pb2
.
ProgramDesc
.
FromString
(
proto
=
framework_pb2
.
ProgramDesc
.
FromString
(
program
.
desc
.
serialize_to_string
())
program
.
desc
.
serialize_to_string
())
for
block
in
proto
.
blocks
:
for
block
in
proto
.
blocks
:
for
op
in
block
.
ops
:
for
op
in
block
.
ops
:
op
.
type
=
op
.
type
+
"_"
+
str
(
temp_id
)
temp_id
+=
1
graph
.
node
(
**
draw_node
(
op
))
graph
.
node
(
**
draw_node
(
op
))
for
o
in
op
.
outputs
:
for
o
in
op
.
outputs
:
for
arg
in
o
.
arguments
:
for
arg
in
o
.
arguments
:
...
@@ -78,6 +81,7 @@ def parse_graph(program, graph, var_dict, **kwargs):
...
@@ -78,6 +81,7 @@ def parse_graph(program, graph, var_dict, **kwargs):
for
arg
in
e
.
arguments
:
for
arg
in
e
.
arguments
:
if
var_dict
.
has_key
(
arg
):
if
var_dict
.
has_key
(
arg
):
graph
.
edge
(
**
draw_edge
(
var_dict
,
op
,
e
,
arg
))
graph
.
edge
(
**
draw_edge
(
var_dict
,
op
,
e
,
arg
))
break
# only plot the first block
def
draw_graph
(
startup_program
,
main_program
,
**
kwargs
):
def
draw_graph
(
startup_program
,
main_program
,
**
kwargs
):
...
...
python/paddle/v2/fluid/tests/book/test_fit_a_line.py
浏览文件 @
94e86897
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2
as
paddle
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.optimizer
as
optimizer
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.framework
as
framework
from
paddle.v2.fluid.io
import
save_persistables
,
load_persistable
s
import
paddle.v2.fluid.layers
as
layer
s
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.io
import
save_persistables
,
load_persistables
from
paddle.v2.fluid.optimizer
import
SGDOptimizer
import
numpy
as
np
x
=
layers
.
data
(
name
=
'x'
,
shape
=
[
13
],
data_type
=
'float32'
)
x
=
layers
.
data
(
name
=
'x'
,
shape
=
[
13
],
data_type
=
'float32'
)
y_predict
=
layers
.
fc
(
input
=
x
,
y_predict
=
layers
.
fc
(
input
=
x
,
size
=
1
,
act
=
None
)
size
=
1
,
act
=
None
)
y
=
layers
.
data
(
y
=
layers
.
data
(
name
=
'y'
,
shape
=
[
1
],
data_type
=
'float32'
)
name
=
'y'
,
shape
=
[
1
],
data_type
=
'float32'
)
cost
=
layers
.
square_error_cost
(
cost
=
layers
.
square_error_cost
(
input
=
y_predict
,
label
=
y
)
input
=
y_predict
,
label
=
y
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
sgd_optimizer
=
optimizer
.
SGDOptimizer
(
learning_rate
=
0.001
)
sgd_optimizer
=
SGDOptimizer
(
learning_rate
=
0.001
)
opts
=
sgd_optimizer
.
minimize
(
avg_cost
)
opts
=
sgd_optimizer
.
minimize
(
avg_cost
)
BATCH_SIZE
=
20
BATCH_SIZE
=
20
...
...
python/paddle/v2/fluid/tests/book/test_image_classification_train.py
浏览文件 @
94e86897
import
numpy
as
np
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2
as
paddle
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.nets
as
nets
import
paddle.v2.fluid.nets
as
nets
import
paddle.v2.fluid.optimizer
as
optimizer
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.executor
import
Executor
import
paddle.v2.fluid.framework
as
framework
from
paddle.v2.fluid.initializer
import
XavierInitializer
from
paddle.v2.fluid.initializer
import
XavierInitializer
from
paddle.v2.fluid.optimizer
import
AdamOptimizer
def
resnet_cifar10
(
input
,
depth
=
32
):
def
resnet_cifar10
(
input
,
depth
=
32
):
def
conv_bn_layer
(
input
,
def
conv_bn_layer
(
input
,
ch_out
,
filter_size
,
stride
,
padding
,
act
=
'relu'
):
ch_out
,
filter_size
,
stride
,
padding
,
act
=
'relu'
):
tmp
=
layers
.
conv2d
(
tmp
=
layers
.
conv2d
(
input
=
input
,
input
=
input
,
filter_size
=
filter_size
,
filter_size
=
filter_size
,
...
@@ -24,9 +19,7 @@ def resnet_cifar10(input, depth=32):
...
@@ -24,9 +19,7 @@ def resnet_cifar10(input, depth=32):
padding
=
padding
,
padding
=
padding
,
act
=
None
,
act
=
None
,
bias_attr
=
False
)
bias_attr
=
False
)
return
layers
.
batch_norm
(
return
layers
.
batch_norm
(
input
=
tmp
,
act
=
act
)
input
=
tmp
,
act
=
act
)
def
shortcut
(
input
,
ch_in
,
ch_out
,
stride
,
program
,
init_program
):
def
shortcut
(
input
,
ch_in
,
ch_out
,
stride
,
program
,
init_program
):
if
ch_in
!=
ch_out
:
if
ch_in
!=
ch_out
:
...
@@ -35,28 +28,11 @@ def resnet_cifar10(input, depth=32):
...
@@ -35,28 +28,11 @@ def resnet_cifar10(input, depth=32):
else
:
else
:
return
input
return
input
def
basicblock
(
input
,
def
basicblock
(
input
,
ch_in
,
ch_out
,
stride
):
ch_in
,
tmp
=
conv_bn_layer
(
input
,
ch_out
,
3
,
stride
,
1
)
ch_out
,
tmp
=
conv_bn_layer
(
tmp
,
ch_out
,
3
,
1
,
1
,
act
=
None
)
stride
):
tmp
=
conv_bn_layer
(
input
,
ch_out
,
3
,
stride
,
1
)
tmp
=
conv_bn_layer
(
tmp
,
ch_out
,
3
,
1
,
1
,
act
=
None
)
short
=
shortcut
(
input
,
ch_in
,
ch_out
,
stride
)
short
=
shortcut
(
input
,
ch_in
,
ch_out
,
stride
)
return
layers
.
elementwise_add
(
return
layers
.
elementwise_add
(
x
=
tmp
,
y
=
short
,
act
=
'relu'
)
x
=
tmp
,
y
=
short
,
act
=
'relu'
)
def
layer_warp
(
block_func
,
input
,
ch_in
,
ch_out
,
count
,
stride
):
def
layer_warp
(
block_func
,
input
,
ch_in
,
ch_out
,
count
,
stride
):
tmp
=
block_func
(
input
,
ch_in
,
ch_out
,
stride
)
tmp
=
block_func
(
input
,
ch_in
,
ch_out
,
stride
)
...
@@ -67,45 +43,17 @@ def resnet_cifar10(input, depth=32):
...
@@ -67,45 +43,17 @@ def resnet_cifar10(input, depth=32):
assert
(
depth
-
2
)
%
6
==
0
assert
(
depth
-
2
)
%
6
==
0
n
=
(
depth
-
2
)
/
6
n
=
(
depth
-
2
)
/
6
conv1
=
conv_bn_layer
(
conv1
=
conv_bn_layer
(
input
=
input
,
input
=
input
,
ch_out
=
16
,
filter_size
=
3
,
stride
=
1
,
padding
=
1
)
ch_out
=
16
,
res1
=
layer_warp
(
basicblock
,
conv1
,
16
,
16
,
n
,
1
)
filter_size
=
3
,
res2
=
layer_warp
(
basicblock
,
res1
,
16
,
32
,
n
,
2
)
stride
=
1
,
res3
=
layer_warp
(
basicblock
,
res2
,
32
,
64
,
n
,
2
)
padding
=
1
)
res1
=
layer_warp
(
basicblock
,
conv1
,
16
,
16
,
n
,
1
)
res2
=
layer_warp
(
basicblock
,
res1
,
16
,
32
,
n
,
2
)
res3
=
layer_warp
(
basicblock
,
res2
,
32
,
64
,
n
,
2
)
pool
=
layers
.
pool2d
(
pool
=
layers
.
pool2d
(
input
=
res3
,
input
=
res3
,
pool_size
=
8
,
pool_type
=
'avg'
,
pool_stride
=
1
)
pool_size
=
8
,
pool_type
=
'avg'
,
pool_stride
=
1
)
return
pool
return
pool
def
vgg16_bn_drop
(
input
):
def
vgg16_bn_drop
(
input
):
def
conv_block
(
input
,
def
conv_block
(
input
,
num_filter
,
groups
,
dropouts
):
num_filter
,
groups
,
dropouts
):
return
nets
.
img_conv_group
(
return
nets
.
img_conv_group
(
input
=
input
,
input
=
input
,
pool_size
=
2
,
pool_size
=
2
,
...
@@ -123,22 +71,14 @@ def vgg16_bn_drop(input):
...
@@ -123,22 +71,14 @@ def vgg16_bn_drop(input):
conv4
=
conv_block
(
conv3
,
512
,
3
,
[
0.4
,
0.4
,
0
])
conv4
=
conv_block
(
conv3
,
512
,
3
,
[
0.4
,
0.4
,
0
])
conv5
=
conv_block
(
conv4
,
512
,
3
,
[
0.4
,
0.4
,
0
])
conv5
=
conv_block
(
conv4
,
512
,
3
,
[
0.4
,
0.4
,
0
])
drop
=
layers
.
dropout
(
drop
=
layers
.
dropout
(
x
=
conv5
,
dropout_prob
=
0.5
)
x
=
conv5
,
dropout_prob
=
0.5
)
fc1
=
layers
.
fc
(
input
=
drop
,
fc1
=
layers
.
fc
(
input
=
drop
,
size
=
512
,
size
=
512
,
act
=
None
,
act
=
None
,
param_attr
=
{
"initializer"
:
XavierInitializer
()})
param_attr
=
{
"initializer"
:
XavierInitializer
()})
reshape1
=
layers
.
reshape
(
reshape1
=
layers
.
reshape
(
x
=
fc1
,
shape
=
list
(
fc1
.
shape
+
(
1
,
1
)))
x
=
fc1
,
bn
=
layers
.
batch_norm
(
input
=
reshape1
,
act
=
'relu'
)
shape
=
list
(
fc1
.
shape
+
(
1
,
1
)))
drop2
=
layers
.
dropout
(
x
=
bn
,
dropout_prob
=
0.5
)
bn
=
layers
.
batch_norm
(
input
=
reshape1
,
act
=
'relu'
)
drop2
=
layers
.
dropout
(
x
=
bn
,
dropout_prob
=
0.5
)
fc2
=
layers
.
fc
(
input
=
drop2
,
fc2
=
layers
.
fc
(
input
=
drop2
,
size
=
512
,
size
=
512
,
act
=
None
,
act
=
None
,
...
@@ -165,8 +105,8 @@ cost = layers.cross_entropy(input=predict, label=label)
...
@@ -165,8 +105,8 @@ cost = layers.cross_entropy(input=predict, label=label)
avg_cost
=
layers
.
mean
(
x
=
cost
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
accuracy
=
layers
.
accuracy
(
input
=
predict
,
label
=
label
)
accuracy
=
layers
.
accuracy
(
input
=
predict
,
label
=
label
)
# optimizer =
optimizer.
SGDOptimizer(learning_rate=0.001)
# optimizer = SGDOptimizer(learning_rate=0.001)
optimizer
=
optimizer
.
AdamOptimizer
(
learning_rate
=
0.001
)
optimizer
=
AdamOptimizer
(
learning_rate
=
0.001
)
opts
=
optimizer
.
minimize
(
avg_cost
)
opts
=
optimizer
.
minimize
(
avg_cost
)
BATCH_SIZE
=
128
BATCH_SIZE
=
128
...
...
python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py
浏览文件 @
94e86897
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2
as
paddle
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.nets
as
nets
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.optimizer
as
optimizer
import
paddle.v2.fluid.evaluator
as
evaluator
import
paddle.v2.fluid.evaluator
as
evaluator
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.nets
as
nets
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.optimizer
import
AdamOptimizer
import
numpy
as
np
images
=
layers
.
data
(
name
=
'pixel'
,
shape
=
[
1
,
28
,
28
],
data_type
=
'float32'
)
label
=
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
data_type
=
'int64'
)
images
=
layers
.
data
(
name
=
'pixel'
,
shape
=
[
1
,
28
,
28
],
data_type
=
'float32'
)
label
=
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
data_type
=
'int64'
)
conv_pool_1
=
nets
.
simple_img_conv_pool
(
conv_pool_1
=
nets
.
simple_img_conv_pool
(
input
=
images
,
input
=
images
,
filter_size
=
5
,
filter_size
=
5
,
...
@@ -32,17 +25,13 @@ conv_pool_2 = nets.simple_img_conv_pool(
...
@@ -32,17 +25,13 @@ conv_pool_2 = nets.simple_img_conv_pool(
pool_stride
=
2
,
pool_stride
=
2
,
act
=
"relu"
)
act
=
"relu"
)
predict
=
layers
.
fc
(
input
=
conv_pool_2
,
predict
=
layers
.
fc
(
input
=
conv_pool_2
,
size
=
10
,
act
=
"softmax"
)
size
=
10
,
act
=
"softmax"
)
cost
=
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
cost
=
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
optimizer
=
optimizer
.
AdamOptimizer
(
learning_rate
=
0.01
,
beta1
=
0.9
,
beta2
=
0.999
)
optimizer
=
AdamOptimizer
(
learning_rate
=
0.01
,
beta1
=
0.9
,
beta2
=
0.999
)
opts
=
optimizer
.
minimize
(
avg_cost
)
opts
=
optimizer
.
minimize
(
avg_cost
)
accuracy
,
acc_out
=
evaluator
.
accuracy
(
accuracy
,
acc_out
=
evaluator
.
accuracy
(
input
=
predict
,
label
=
label
)
input
=
predict
,
label
=
label
)
BATCH_SIZE
=
50
BATCH_SIZE
=
50
PASS_NUM
=
3
PASS_NUM
=
3
...
...
python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
浏览文件 @
94e86897
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2
as
paddle
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.optimizer
as
optimizer
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.regularizer
import
L2DecayRegularizer
from
paddle.v2.fluid.initializer
import
UniformInitializer
from
paddle.v2.fluid.initializer
import
UniformInitializer
from
paddle.v2.fluid.optimizer
import
MomentumOptimizer
import
numpy
as
np
from
paddle.v2.fluid.regularizer
import
L2DecayRegularizer
BATCH_SIZE
=
128
BATCH_SIZE
=
128
image
=
layers
.
data
(
image
=
layers
.
data
(
name
=
'x'
,
shape
=
[
784
],
data_type
=
'float32'
)
name
=
'x'
,
shape
=
[
784
],
data_type
=
'float32'
)
param_attr
=
{
param_attr
=
{
'name'
:
None
,
'name'
:
None
,
...
@@ -22,32 +18,21 @@ param_attr = {
...
@@ -22,32 +18,21 @@ param_attr = {
'regularization'
:
L2DecayRegularizer
(
0.0005
*
BATCH_SIZE
)
'regularization'
:
L2DecayRegularizer
(
0.0005
*
BATCH_SIZE
)
}
}
hidden1
=
layers
.
fc
(
input
=
image
,
hidden1
=
layers
.
fc
(
input
=
image
,
size
=
128
,
act
=
'relu'
,
param_attr
=
param_attr
)
size
=
128
,
hidden2
=
layers
.
fc
(
input
=
hidden1
,
size
=
64
,
act
=
'relu'
,
param_attr
=
param_attr
)
act
=
'relu'
,
param_attr
=
param_attr
)
hidden2
=
layers
.
fc
(
input
=
hidden1
,
size
=
64
,
act
=
'relu'
,
param_attr
=
param_attr
)
predict
=
layers
.
fc
(
input
=
hidden2
,
predict
=
layers
.
fc
(
input
=
hidden2
,
size
=
10
,
size
=
10
,
act
=
'softmax'
,
act
=
'softmax'
,
param_attr
=
param_attr
)
param_attr
=
param_attr
)
label
=
layers
.
data
(
label
=
layers
.
data
(
name
=
'y'
,
shape
=
[
1
],
data_type
=
'int64'
)
name
=
'y'
,
shape
=
[
1
],
data_type
=
'int64'
)
cost
=
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
cost
=
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
accuracy
=
layers
.
accuracy
(
accuracy
=
layers
.
accuracy
(
input
=
predict
,
label
=
label
)
input
=
predict
,
label
=
label
)
optimizer
=
optimizer
.
MomentumOptimizer
(
learning_rate
=
0.001
,
momentum
=
0.9
)
optimizer
=
MomentumOptimizer
(
learning_rate
=
0.001
,
momentum
=
0.9
)
opts
=
optimizer
.
minimize
(
avg_cost
)
opts
=
optimizer
.
minimize
(
avg_cost
)
train_reader
=
paddle
.
batch
(
train_reader
=
paddle
.
batch
(
...
...
python/paddle/v2/fluid/tests/book/test_recommender_system.py
浏览文件 @
94e86897
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2
as
paddle
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.nets
as
nets
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.optimizer
as
optimizer
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.nets
as
nets
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.optimizer
import
SGDOptimizer
import
numpy
as
np
IS_SPARSE
=
True
IS_SPARSE
=
True
USE_GPU
=
False
USE_GPU
=
False
...
@@ -19,10 +18,7 @@ def get_usr_combined_features():
...
@@ -19,10 +18,7 @@ def get_usr_combined_features():
USR_DICT_SIZE
=
paddle
.
dataset
.
movielens
.
max_user_id
()
+
1
USR_DICT_SIZE
=
paddle
.
dataset
.
movielens
.
max_user_id
()
+
1
uid
=
layers
.
data
(
uid
=
layers
.
data
(
name
=
'user_id'
,
shape
=
[
1
],
data_type
=
'int64'
)
name
=
'user_id'
,
shape
=
[
1
],
data_type
=
'int64'
)
usr_emb
=
layers
.
embedding
(
usr_emb
=
layers
.
embedding
(
input
=
uid
,
input
=
uid
,
...
@@ -31,15 +27,11 @@ def get_usr_combined_features():
...
@@ -31,15 +27,11 @@ def get_usr_combined_features():
param_attr
=
{
'name'
:
'user_table'
},
param_attr
=
{
'name'
:
'user_table'
},
is_sparse
=
IS_SPARSE
)
is_sparse
=
IS_SPARSE
)
usr_fc
=
layers
.
fc
(
input
=
usr_emb
,
usr_fc
=
layers
.
fc
(
input
=
usr_emb
,
size
=
32
)
size
=
32
)
USR_GENDER_DICT_SIZE
=
2
USR_GENDER_DICT_SIZE
=
2
usr_gender_id
=
layers
.
data
(
usr_gender_id
=
layers
.
data
(
name
=
'gender_id'
,
shape
=
[
1
],
data_type
=
'int64'
)
name
=
'gender_id'
,
shape
=
[
1
],
data_type
=
'int64'
)
usr_gender_emb
=
layers
.
embedding
(
usr_gender_emb
=
layers
.
embedding
(
input
=
usr_gender_id
,
input
=
usr_gender_id
,
...
@@ -47,14 +39,10 @@ def get_usr_combined_features():
...
@@ -47,14 +39,10 @@ def get_usr_combined_features():
param_attr
=
{
'name'
:
'gender_table'
},
param_attr
=
{
'name'
:
'gender_table'
},
is_sparse
=
IS_SPARSE
)
is_sparse
=
IS_SPARSE
)
usr_gender_fc
=
layers
.
fc
(
input
=
usr_gender_emb
,
usr_gender_fc
=
layers
.
fc
(
input
=
usr_gender_emb
,
size
=
16
)
size
=
16
)
USR_AGE_DICT_SIZE
=
len
(
paddle
.
dataset
.
movielens
.
age_table
)
USR_AGE_DICT_SIZE
=
len
(
paddle
.
dataset
.
movielens
.
age_table
)
usr_age_id
=
layers
.
data
(
usr_age_id
=
layers
.
data
(
name
=
'age_id'
,
shape
=
[
1
],
data_type
=
"int64"
)
name
=
'age_id'
,
shape
=
[
1
],
data_type
=
"int64"
)
usr_age_emb
=
layers
.
embedding
(
usr_age_emb
=
layers
.
embedding
(
input
=
usr_age_id
,
input
=
usr_age_id
,
...
@@ -62,14 +50,10 @@ def get_usr_combined_features():
...
@@ -62,14 +50,10 @@ def get_usr_combined_features():
is_sparse
=
IS_SPARSE
,
is_sparse
=
IS_SPARSE
,
param_attr
=
{
'name'
:
'age_table'
})
param_attr
=
{
'name'
:
'age_table'
})
usr_age_fc
=
layers
.
fc
(
input
=
usr_age_emb
,
usr_age_fc
=
layers
.
fc
(
input
=
usr_age_emb
,
size
=
16
)
size
=
16
)
USR_JOB_DICT_SIZE
=
paddle
.
dataset
.
movielens
.
max_job_id
()
+
1
USR_JOB_DICT_SIZE
=
paddle
.
dataset
.
movielens
.
max_job_id
()
+
1
usr_job_id
=
layers
.
data
(
usr_job_id
=
layers
.
data
(
name
=
'job_id'
,
shape
=
[
1
],
data_type
=
"int64"
)
name
=
'job_id'
,
shape
=
[
1
],
data_type
=
"int64"
)
usr_job_emb
=
layers
.
embedding
(
usr_job_emb
=
layers
.
embedding
(
input
=
usr_job_id
,
input
=
usr_job_id
,
...
@@ -77,16 +61,12 @@ def get_usr_combined_features():
...
@@ -77,16 +61,12 @@ def get_usr_combined_features():
param_attr
=
{
'name'
:
'job_table'
},
param_attr
=
{
'name'
:
'job_table'
},
is_sparse
=
IS_SPARSE
)
is_sparse
=
IS_SPARSE
)
usr_job_fc
=
layers
.
fc
(
input
=
usr_job_emb
,
usr_job_fc
=
layers
.
fc
(
input
=
usr_job_emb
,
size
=
16
)
size
=
16
)
concat_embed
=
layers
.
concat
(
concat_embed
=
layers
.
concat
(
input
=
[
usr_fc
,
usr_gender_fc
,
usr_age_fc
,
usr_job_fc
],
input
=
[
usr_fc
,
usr_gender_fc
,
usr_age_fc
,
usr_job_fc
],
axis
=
1
)
axis
=
1
)
usr_combined_features
=
layers
.
fc
(
input
=
concat_embed
,
usr_combined_features
=
layers
.
fc
(
input
=
concat_embed
,
size
=
200
,
act
=
"tanh"
)
size
=
200
,
act
=
"tanh"
)
return
usr_combined_features
return
usr_combined_features
...
@@ -95,10 +75,7 @@ def get_mov_combined_features():
...
@@ -95,10 +75,7 @@ def get_mov_combined_features():
MOV_DICT_SIZE
=
paddle
.
dataset
.
movielens
.
max_movie_id
()
+
1
MOV_DICT_SIZE
=
paddle
.
dataset
.
movielens
.
max_movie_id
()
+
1
mov_id
=
layers
.
data
(
mov_id
=
layers
.
data
(
name
=
'movie_id'
,
shape
=
[
1
],
data_type
=
'int64'
)
name
=
'movie_id'
,
shape
=
[
1
],
data_type
=
'int64'
)
mov_emb
=
layers
.
embedding
(
mov_emb
=
layers
.
embedding
(
input
=
mov_id
,
input
=
mov_id
,
...
@@ -107,36 +84,24 @@ def get_mov_combined_features():
...
@@ -107,36 +84,24 @@ def get_mov_combined_features():
param_attr
=
{
'name'
:
'movie_table'
},
param_attr
=
{
'name'
:
'movie_table'
},
is_sparse
=
IS_SPARSE
)
is_sparse
=
IS_SPARSE
)
mov_fc
=
layers
.
fc
(
input
=
mov_emb
,
mov_fc
=
layers
.
fc
(
input
=
mov_emb
,
size
=
32
)
size
=
32
)
CATEGORY_DICT_SIZE
=
len
(
paddle
.
dataset
.
movielens
.
movie_categories
())
CATEGORY_DICT_SIZE
=
len
(
paddle
.
dataset
.
movielens
.
movie_categories
())
category_id
=
layers
.
data
(
category_id
=
layers
.
data
(
name
=
'category_id'
,
shape
=
[
1
],
data_type
=
'int64'
)
name
=
'category_id'
,
shape
=
[
1
],
data_type
=
'int64'
)
mov_categories_emb
=
layers
.
embedding
(
mov_categories_emb
=
layers
.
embedding
(
input
=
category_id
,
input
=
category_id
,
size
=
[
CATEGORY_DICT_SIZE
,
32
],
is_sparse
=
IS_SPARSE
)
size
=
[
CATEGORY_DICT_SIZE
,
32
],
is_sparse
=
IS_SPARSE
)
mov_categories_hidden
=
layers
.
sequence_pool
(
mov_categories_hidden
=
layers
.
sequence_pool
(
input
=
mov_categories_emb
,
input
=
mov_categories_emb
,
pool_type
=
"sum"
)
pool_type
=
"sum"
)
MOV_TITLE_DICT_SIZE
=
len
(
paddle
.
dataset
.
movielens
.
get_movie_title_dict
())
MOV_TITLE_DICT_SIZE
=
len
(
paddle
.
dataset
.
movielens
.
get_movie_title_dict
())
mov_title_id
=
layers
.
data
(
mov_title_id
=
layers
.
data
(
name
=
'movie_title'
,
shape
=
[
1
],
data_type
=
'int64'
)
name
=
'movie_title'
,
shape
=
[
1
],
data_type
=
'int64'
)
mov_title_emb
=
layers
.
embedding
(
mov_title_emb
=
layers
.
embedding
(
input
=
mov_title_id
,
input
=
mov_title_id
,
size
=
[
MOV_TITLE_DICT_SIZE
,
32
],
is_sparse
=
IS_SPARSE
)
size
=
[
MOV_TITLE_DICT_SIZE
,
32
],
is_sparse
=
IS_SPARSE
)
mov_title_conv
=
nets
.
sequence_conv_pool
(
mov_title_conv
=
nets
.
sequence_conv_pool
(
input
=
mov_title_emb
,
input
=
mov_title_emb
,
...
@@ -146,13 +111,10 @@ def get_mov_combined_features():
...
@@ -146,13 +111,10 @@ def get_mov_combined_features():
pool_type
=
"sum"
)
pool_type
=
"sum"
)
concat_embed
=
layers
.
concat
(
concat_embed
=
layers
.
concat
(
input
=
[
mov_fc
,
mov_categories_hidden
,
mov_title_conv
],
input
=
[
mov_fc
,
mov_categories_hidden
,
mov_title_conv
],
axis
=
1
)
axis
=
1
)
# FIXME(dzh) : need tanh operator
# FIXME(dzh) : need tanh operator
mov_combined_features
=
layers
.
fc
(
input
=
concat_embed
,
mov_combined_features
=
layers
.
fc
(
input
=
concat_embed
,
size
=
200
,
act
=
"tanh"
)
size
=
200
,
act
=
"tanh"
)
return
mov_combined_features
return
mov_combined_features
...
@@ -162,18 +124,11 @@ def model():
...
@@ -162,18 +124,11 @@ def model():
mov_combined_features
=
get_mov_combined_features
()
mov_combined_features
=
get_mov_combined_features
()
# need cos sim
# need cos sim
inference
=
layers
.
cos_sim
(
inference
=
layers
.
cos_sim
(
X
=
usr_combined_features
,
Y
=
mov_combined_features
)
X
=
usr_combined_features
,
Y
=
mov_combined_features
)
label
=
layers
.
data
(
label
=
layers
.
data
(
name
=
'score'
,
shape
=
[
1
],
data_type
=
'float32'
)
name
=
'score'
,
shape
=
[
1
],
data_type
=
'float32'
)
square_cost
=
layers
.
square_error_cost
(
square_cost
=
layers
.
square_error_cost
(
input
=
inference
,
label
=
label
)
input
=
inference
,
label
=
label
)
avg_cost
=
layers
.
mean
(
x
=
square_cost
)
avg_cost
=
layers
.
mean
(
x
=
square_cost
)
...
@@ -182,7 +137,7 @@ def model():
...
@@ -182,7 +137,7 @@ def model():
def
main
():
def
main
():
cost
=
model
()
cost
=
model
()
sgd_optimizer
=
optimizer
.
SGDOptimizer
(
learning_rate
=
0.2
)
sgd_optimizer
=
SGDOptimizer
(
learning_rate
=
0.2
)
opts
=
sgd_optimizer
.
minimize
(
cost
)
opts
=
sgd_optimizer
.
minimize
(
cost
)
if
USE_GPU
:
if
USE_GPU
:
...
...
python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py
浏览文件 @
94e86897
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2
as
paddle
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.nets
as
nets
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.optimizer
as
optimizer
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.nets
as
nets
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.optimizer
import
AdamOptimizer
import
numpy
as
np
def
convolution_net
(
input_dim
,
class_dim
=
2
,
emb_dim
=
32
,
hid_dim
=
32
):
def
convolution_net
(
input_dim
,
class_dim
=
2
,
emb_dim
=
32
,
hid_dim
=
32
):
...
@@ -31,7 +30,7 @@ def convolution_net(input_dim, class_dim=2, emb_dim=32, hid_dim=32):
...
@@ -31,7 +30,7 @@ def convolution_net(input_dim, class_dim=2, emb_dim=32, hid_dim=32):
act
=
"softmax"
)
act
=
"softmax"
)
cost
=
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
cost
=
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
adam_optimizer
=
optimizer
.
AdamOptimizer
(
learning_rate
=
0.002
)
adam_optimizer
=
AdamOptimizer
(
learning_rate
=
0.002
)
opts
=
adam_optimizer
.
minimize
(
avg_cost
)
opts
=
adam_optimizer
.
minimize
(
avg_cost
)
acc
=
layers
.
accuracy
(
input
=
prediction
,
label
=
label
)
acc
=
layers
.
accuracy
(
input
=
prediction
,
label
=
label
)
return
avg_cost
,
acc
return
avg_cost
,
acc
...
...
python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py
浏览文件 @
94e86897
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2
as
paddle
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.nets
as
nets
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.optimizer
as
optimizer
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.optimizer
import
AdamOptimizer
import
numpy
as
np
def
stacked_lstm_net
(
input_dim
,
def
stacked_lstm_net
(
input_dim
,
...
@@ -41,7 +39,7 @@ def stacked_lstm_net(input_dim,
...
@@ -41,7 +39,7 @@ def stacked_lstm_net(input_dim,
act
=
'softmax'
)
act
=
'softmax'
)
cost
=
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
cost
=
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
adam_optimizer
=
optimizer
.
AdamOptimizer
(
learning_rate
=
0.002
)
adam_optimizer
=
AdamOptimizer
(
learning_rate
=
0.002
)
opts
=
adam_optimizer
.
minimize
(
avg_cost
)
opts
=
adam_optimizer
.
minimize
(
avg_cost
)
acc
=
layers
.
accuracy
(
input
=
prediction
,
label
=
label
)
acc
=
layers
.
accuracy
(
input
=
prediction
,
label
=
label
)
return
avg_cost
,
acc
return
avg_cost
,
acc
...
...
python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py
浏览文件 @
94e86897
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2
as
paddle
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.optimizer
as
optimizer
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.optimizer
import
AdamOptimizer
import
numpy
as
np
def
lstm_net
(
dict_dim
,
class_dim
=
2
,
emb_dim
=
32
,
seq_len
=
80
,
batch_size
=
50
):
def
lstm_net
(
dict_dim
,
class_dim
=
2
,
emb_dim
=
32
,
seq_len
=
80
,
batch_size
=
50
):
...
@@ -33,7 +32,7 @@ def lstm_net(dict_dim, class_dim=2, emb_dim=32, seq_len=80, batch_size=50):
...
@@ -33,7 +32,7 @@ def lstm_net(dict_dim, class_dim=2, emb_dim=32, seq_len=80, batch_size=50):
cost
=
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
cost
=
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
adam_optimizer
=
optimizer
.
AdamOptimizer
(
learning_rate
=
0.002
)
adam_optimizer
=
AdamOptimizer
(
learning_rate
=
0.002
)
opts
=
adam_optimizer
.
minimize
(
avg_cost
)
opts
=
adam_optimizer
.
minimize
(
avg_cost
)
acc
=
layers
.
accuracy
(
input
=
prediction
,
label
=
label
)
acc
=
layers
.
accuracy
(
input
=
prediction
,
label
=
label
)
...
...
python/paddle/v2/fluid/tests/book/test_word2vec.py
浏览文件 @
94e86897
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2
as
paddle
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.optimizer
as
optimizer
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.optimizer
import
SGDOptimizer
import
numpy
as
np
PASS_NUM
=
100
PASS_NUM
=
100
EMBED_SIZE
=
32
EMBED_SIZE
=
32
...
@@ -17,26 +16,11 @@ IS_SPARSE = True
...
@@ -17,26 +16,11 @@ IS_SPARSE = True
word_dict
=
paddle
.
dataset
.
imikolov
.
build_dict
()
word_dict
=
paddle
.
dataset
.
imikolov
.
build_dict
()
dict_size
=
len
(
word_dict
)
dict_size
=
len
(
word_dict
)
first_word
=
layers
.
data
(
first_word
=
layers
.
data
(
name
=
'firstw'
,
shape
=
[
1
],
data_type
=
'int64'
)
name
=
'firstw'
,
second_word
=
layers
.
data
(
name
=
'secondw'
,
shape
=
[
1
],
data_type
=
'int64'
)
shape
=
[
1
],
third_word
=
layers
.
data
(
name
=
'thirdw'
,
shape
=
[
1
],
data_type
=
'int64'
)
data_type
=
'int64'
)
forth_word
=
layers
.
data
(
name
=
'forthw'
,
shape
=
[
1
],
data_type
=
'int64'
)
second_word
=
layers
.
data
(
next_word
=
layers
.
data
(
name
=
'nextw'
,
shape
=
[
1
],
data_type
=
'int64'
)
name
=
'secondw'
,
shape
=
[
1
],
data_type
=
'int64'
)
third_word
=
layers
.
data
(
name
=
'thirdw'
,
shape
=
[
1
],
data_type
=
'int64'
)
forth_word
=
layers
.
data
(
name
=
'forthw'
,
shape
=
[
1
],
data_type
=
'int64'
)
next_word
=
layers
.
data
(
name
=
'nextw'
,
shape
=
[
1
],
data_type
=
'int64'
)
embed_first
=
layers
.
embedding
(
embed_first
=
layers
.
embedding
(
input
=
first_word
,
input
=
first_word
,
...
@@ -64,19 +48,12 @@ embed_forth = layers.embedding(
...
@@ -64,19 +48,12 @@ embed_forth = layers.embedding(
param_attr
=
{
'name'
:
'shared_w'
})
param_attr
=
{
'name'
:
'shared_w'
})
concat_embed
=
layers
.
concat
(
concat_embed
=
layers
.
concat
(
input
=
[
embed_first
,
embed_second
,
embed_third
,
embed_forth
],
input
=
[
embed_first
,
embed_second
,
embed_third
,
embed_forth
],
axis
=
1
)
axis
=
1
)
hidden1
=
layers
.
fc
(
input
=
concat_embed
,
size
=
HIDDEN_SIZE
,
act
=
'sigmoid'
)
hidden1
=
layers
.
fc
(
input
=
concat_embed
,
predict_word
=
layers
.
fc
(
input
=
hidden1
,
size
=
dict_size
,
act
=
'softmax'
)
size
=
HIDDEN_SIZE
,
cost
=
layers
.
cross_entropy
(
input
=
predict_word
,
label
=
next_word
)
act
=
'sigmoid'
)
predict_word
=
layers
.
fc
(
input
=
hidden1
,
size
=
dict_size
,
act
=
'softmax'
)
cost
=
layers
.
cross_entropy
(
input
=
predict_word
,
label
=
next_word
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
sgd_optimizer
=
optimizer
.
SGDOptimizer
(
learning_rate
=
0.001
)
sgd_optimizer
=
SGDOptimizer
(
learning_rate
=
0.001
)
opts
=
sgd_optimizer
.
minimize
(
avg_cost
)
opts
=
sgd_optimizer
.
minimize
(
avg_cost
)
train_reader
=
paddle
.
batch
(
train_reader
=
paddle
.
batch
(
...
...
python/paddle/v2/fluid/tests/test_conv2d_op.py
浏览文件 @
94e86897
...
@@ -110,13 +110,30 @@ class TestConv2dOp(OpTest):
...
@@ -110,13 +110,30 @@ class TestConv2dOp(OpTest):
self
.
op_type
=
"conv2d"
self
.
op_type
=
"conv2d"
class
TestWithPad
(
TestConv2dOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
1
,
1
]
self
.
stride
=
[
1
,
1
]
self
.
input_size
=
[
2
,
3
,
5
,
5
]
# NCHW
assert
np
.
mod
(
self
.
input_size
[
1
],
self
.
groups
)
==
0
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
3
,
3
]
class
TestWithStride
(
TestConv2dOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
1
,
1
]
self
.
stride
=
[
2
,
2
]
self
.
input_size
=
[
2
,
3
,
6
,
6
]
# NCHW
assert
np
.
mod
(
self
.
input_size
[
1
],
self
.
groups
)
==
0
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
3
,
3
]
class
TestWithGroup
(
TestConv2dOp
):
class
TestWithGroup
(
TestConv2dOp
):
def
init_group
(
self
):
def
init_group
(
self
):
self
.
groups
=
3
self
.
groups
=
3
def
init_op_type
(
self
):
self
.
op_type
=
"conv2d"
class
TestWith1x1
(
TestConv2dOp
):
class
TestWith1x1
(
TestConv2dOp
):
def
init_test_case
(
self
):
def
init_test_case
(
self
):
...
@@ -127,15 +144,9 @@ class TestWith1x1(TestConv2dOp):
...
@@ -127,15 +144,9 @@ class TestWith1x1(TestConv2dOp):
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
1
,
1
]
self
.
filter_size
=
[
6
,
f_c
,
1
,
1
]
def
init_dilation
(
self
):
self
.
dilations
=
[
1
,
1
]
def
init_group
(
self
):
def
init_group
(
self
):
self
.
groups
=
3
self
.
groups
=
3
def
init_op_type
(
self
):
self
.
op_type
=
"conv2d"
class
TestWithDilation
(
TestConv2dOp
):
class
TestWithDilation
(
TestConv2dOp
):
def
init_test_case
(
self
):
def
init_test_case
(
self
):
...
@@ -152,14 +163,19 @@ class TestWithDilation(TestConv2dOp):
...
@@ -152,14 +163,19 @@ class TestWithDilation(TestConv2dOp):
def
init_group
(
self
):
def
init_group
(
self
):
self
.
groups
=
3
self
.
groups
=
3
#----------------Conv2dCudnn----------------
class
TestCudnn
(
TestConv2dOp
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"conv
2d
"
self
.
op_type
=
"conv
_cudnn
"
#----------------Conv2dCudnn----------------
class
TestCudnnWithPad
(
TestWithPad
):
def
init_op_type
(
self
):
self
.
op_type
=
"conv_cudnn"
class
TestCudnn
(
TestConv2dOp
):
class
TestCudnn
WithStride
(
TestWithStride
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
op_type
=
"conv_cudnn"
self
.
op_type
=
"conv_cudnn"
...
...
python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py
浏览文件 @
94e86897
...
@@ -4,9 +4,7 @@ from op_test import OpTest
...
@@ -4,9 +4,7 @@ from op_test import OpTest
def
conv2dtranspose_forward_naive
(
input_
,
filter_
,
conv2dtranspose_param
):
def
conv2dtranspose_forward_naive
(
input_
,
filter_
,
conv2dtranspose_param
):
# [2, 3, 5, 5]
in_n
,
in_c
,
in_h
,
in_w
=
input_
.
shape
in_n
,
in_c
,
in_h
,
in_w
=
input_
.
shape
# [3, 6, 3, 3]
f_c
,
out_c
,
f_h
,
f_w
=
filter_
.
shape
f_c
,
out_c
,
f_h
,
f_w
=
filter_
.
shape
assert
in_c
==
f_c
assert
in_c
==
f_c
...
@@ -29,6 +27,7 @@ def conv2dtranspose_forward_naive(input_, filter_, conv2dtranspose_param):
...
@@ -29,6 +27,7 @@ def conv2dtranspose_forward_naive(input_, filter_, conv2dtranspose_param):
j1
,
j2
=
j
*
stride
[
0
],
j
*
stride
[
0
]
+
f_w
j1
,
j2
=
j
*
stride
[
0
],
j
*
stride
[
0
]
+
f_w
out
[
n
,
k
,
i1
:
i2
,
j1
:
j2
]
+=
tmp_out
out
[
n
,
k
,
i1
:
i2
,
j1
:
j2
]
+=
tmp_out
out
=
out
[:,
:,
pad
[
0
]:
out_h
-
pad
[
0
],
pad
[
1
]:
out_w
-
pad
[
1
]]
return
out
return
out
...
@@ -36,8 +35,6 @@ class TestConv2dTransposeOp(OpTest):
...
@@ -36,8 +35,6 @@ class TestConv2dTransposeOp(OpTest):
def
setUp
(
self
):
def
setUp
(
self
):
# init as conv transpose
# init as conv transpose
self
.
init_op_type
()
self
.
init_op_type
()
# [2, 3, 5, 5] -> kernel [3, 6, 3, 3] -> output [2, 6, 7, 7]
self
.
init_test_case
()
self
.
init_test_case
()
conv2dtranspose_param
=
{
'stride'
:
self
.
stride
,
'pad'
:
self
.
pad
}
conv2dtranspose_param
=
{
'stride'
:
self
.
stride
,
'pad'
:
self
.
pad
}
...
@@ -55,7 +52,6 @@ class TestConv2dTransposeOp(OpTest):
...
@@ -55,7 +52,6 @@ class TestConv2dTransposeOp(OpTest):
self
.
outputs
=
{
'Output'
:
output
}
self
.
outputs
=
{
'Output'
:
output
}
def
test_check_output
(
self
):
def
test_check_output
(
self
):
print
'check output here for'
,
self
.
op_type
self
.
check_output
()
self
.
check_output
()
def
test_check_grad_no_input
(
self
):
def
test_check_grad_no_input
(
self
):
...
@@ -88,6 +84,26 @@ class TestConv2dTransposeOp(OpTest):
...
@@ -88,6 +84,26 @@ class TestConv2dTransposeOp(OpTest):
self
.
op_type
=
"conv2d_transpose"
self
.
op_type
=
"conv2d_transpose"
class
TestWithPad
(
TestConv2dTransposeOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
1
,
1
]
self
.
stride
=
[
1
,
1
]
self
.
dilations
=
[
1
,
1
]
self
.
input_size
=
[
2
,
3
,
5
,
5
]
# NCHW
f_c
=
self
.
input_size
[
1
]
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
]
class
TestWithStride
(
TestConv2dTransposeOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
1
,
1
]
self
.
stride
=
[
2
,
2
]
self
.
dilations
=
[
1
,
1
]
self
.
input_size
=
[
2
,
3
,
5
,
5
]
# NCHW
f_c
=
self
.
input_size
[
1
]
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
]
# ------------ test_cudnn ------------
# ------------ test_cudnn ------------
class
TestCudnn
(
TestConv2dTransposeOp
):
class
TestCudnn
(
TestConv2dTransposeOp
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
...
...
python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py
浏览文件 @
94e86897
...
@@ -4,9 +4,7 @@ from op_test import OpTest
...
@@ -4,9 +4,7 @@ from op_test import OpTest
def
conv3dtranspose_forward_naive
(
input_
,
filter_
,
conv3dtranspose_param
):
def
conv3dtranspose_forward_naive
(
input_
,
filter_
,
conv3dtranspose_param
):
# [2, 3, 5, 5, 5]
in_n
,
in_c
,
in_d
,
in_h
,
in_w
=
input_
.
shape
in_n
,
in_c
,
in_d
,
in_h
,
in_w
=
input_
.
shape
# [3, 6, 3, 3, 3]
f_c
,
out_c
,
f_d
,
f_h
,
f_w
=
filter_
.
shape
f_c
,
out_c
,
f_d
,
f_h
,
f_w
=
filter_
.
shape
assert
in_c
==
f_c
assert
in_c
==
f_c
...
@@ -14,7 +12,6 @@ def conv3dtranspose_forward_naive(input_, filter_, conv3dtranspose_param):
...
@@ -14,7 +12,6 @@ def conv3dtranspose_forward_naive(input_, filter_, conv3dtranspose_param):
out_d
=
(
in_d
-
1
)
*
stride
[
0
]
+
f_d
out_d
=
(
in_d
-
1
)
*
stride
[
0
]
+
f_d
out_h
=
(
in_h
-
1
)
*
stride
[
1
]
+
f_h
out_h
=
(
in_h
-
1
)
*
stride
[
1
]
+
f_h
out_w
=
(
in_w
-
1
)
*
stride
[
2
]
+
f_w
out_w
=
(
in_w
-
1
)
*
stride
[
2
]
+
f_w
out
=
np
.
zeros
((
in_n
,
out_c
,
out_d
,
out_h
,
out_w
))
out
=
np
.
zeros
((
in_n
,
out_c
,
out_d
,
out_h
,
out_w
))
for
n
in
range
(
in_n
):
for
n
in
range
(
in_n
):
...
@@ -33,6 +30,8 @@ def conv3dtranspose_forward_naive(input_, filter_, conv3dtranspose_param):
...
@@ -33,6 +30,8 @@ def conv3dtranspose_forward_naive(input_, filter_, conv3dtranspose_param):
j1
,
j2
=
j
*
stride
[
2
],
j
*
stride
[
2
]
+
f_w
j1
,
j2
=
j
*
stride
[
2
],
j
*
stride
[
2
]
+
f_w
out
[
n
,
k
,
d1
:
d2
,
i1
:
i2
,
j1
:
j2
]
+=
tmp_out
out
[
n
,
k
,
d1
:
d2
,
i1
:
i2
,
j1
:
j2
]
+=
tmp_out
out
=
out
[:,
:,
pad
[
0
]:
out_d
-
pad
[
0
],
pad
[
1
]:
out_h
-
pad
[
1
],
pad
[
2
]:
out_w
-
pad
[
2
]]
return
out
return
out
...
@@ -40,8 +39,6 @@ class TestConv3dTransposeOp(OpTest):
...
@@ -40,8 +39,6 @@ class TestConv3dTransposeOp(OpTest):
def
setUp
(
self
):
def
setUp
(
self
):
# init as conv transpose
# init as conv transpose
self
.
init_op_type
()
self
.
init_op_type
()
# [2, 3, 5, 5, 5] -> kernel [3, 6, 3, 3, 3] -> output [2, 6, 7, 7, 7]
self
.
init_test_case
()
self
.
init_test_case
()
conv3dtranspose_param
=
{
'stride'
:
self
.
stride
,
'pad'
:
self
.
pad
}
conv3dtranspose_param
=
{
'stride'
:
self
.
stride
,
'pad'
:
self
.
pad
}
...
@@ -49,7 +46,6 @@ class TestConv3dTransposeOp(OpTest):
...
@@ -49,7 +46,6 @@ class TestConv3dTransposeOp(OpTest):
filter_
=
np
.
random
.
random
(
self
.
filter_size
).
astype
(
"float32"
)
filter_
=
np
.
random
.
random
(
self
.
filter_size
).
astype
(
"float32"
)
output
=
conv3dtranspose_forward_naive
(
output
=
conv3dtranspose_forward_naive
(
input_
,
filter_
,
conv3dtranspose_param
).
astype
(
"float32"
)
input_
,
filter_
,
conv3dtranspose_param
).
astype
(
"float32"
)
# print 'deconv output py', output, output.shape
self
.
inputs
=
{
'Input'
:
input_
,
'Filter'
:
filter_
}
self
.
inputs
=
{
'Input'
:
input_
,
'Filter'
:
filter_
}
self
.
attrs
=
{
self
.
attrs
=
{
...
@@ -60,7 +56,6 @@ class TestConv3dTransposeOp(OpTest):
...
@@ -60,7 +56,6 @@ class TestConv3dTransposeOp(OpTest):
self
.
outputs
=
{
'Output'
:
output
}
self
.
outputs
=
{
'Output'
:
output
}
def
test_check_output
(
self
):
def
test_check_output
(
self
):
print
'check output here'
self
.
check_output
()
self
.
check_output
()
def
test_check_grad
(
self
):
def
test_check_grad
(
self
):
...
@@ -85,7 +80,7 @@ class TestConv3dTransposeOp(OpTest):
...
@@ -85,7 +80,7 @@ class TestConv3dTransposeOp(OpTest):
self
.
pad
=
[
0
,
0
,
0
]
self
.
pad
=
[
0
,
0
,
0
]
self
.
stride
=
[
1
,
1
,
1
]
self
.
stride
=
[
1
,
1
,
1
]
self
.
dilations
=
[
1
,
1
,
1
]
self
.
dilations
=
[
1
,
1
,
1
]
self
.
input_size
=
[
2
,
3
,
5
,
5
,
5
]
# NCHW
self
.
input_size
=
[
2
,
3
,
5
,
5
,
5
]
# NC
D
HW
f_c
=
self
.
input_size
[
1
]
f_c
=
self
.
input_size
[
1
]
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
,
3
]
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
,
3
]
...
@@ -93,5 +88,25 @@ class TestConv3dTransposeOp(OpTest):
...
@@ -93,5 +88,25 @@ class TestConv3dTransposeOp(OpTest):
self
.
op_type
=
"conv3d_transpose"
self
.
op_type
=
"conv3d_transpose"
class
TestWithPad
(
TestConv3dTransposeOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
1
,
1
,
1
]
self
.
stride
=
[
1
,
1
,
1
]
self
.
dilations
=
[
1
,
1
,
1
]
self
.
input_size
=
[
2
,
3
,
5
,
5
,
5
]
# NCDHW
f_c
=
self
.
input_size
[
1
]
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
,
3
]
class
TestWithStride
(
TestConv3dTransposeOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
1
,
1
,
1
]
self
.
stride
=
[
2
,
2
,
2
]
self
.
dilations
=
[
1
,
1
,
1
]
self
.
input_size
=
[
2
,
3
,
5
,
5
,
5
]
# NCDHW
f_c
=
self
.
input_size
[
1
]
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
,
3
]
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/v2/fluid/tests/test_is_empty_op.py
0 → 100644
浏览文件 @
94e86897
import
unittest
import
numpy
as
np
from
paddle.v2.fluid.op
import
Operator
import
paddle.v2.fluid.core
as
core
def
create_tensor
(
scope
,
name
,
np_data
):
tensor
=
scope
.
var
(
name
).
get_tensor
()
tensor
.
set_dims
(
np_data
.
shape
)
tensor
.
set
(
np_data
,
core
.
CPUPlace
())
return
tensor
class
TestIsEmptyOp
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
scope
=
core
.
Scope
()
# create input variables
np_data0
=
np
.
array
([
0
,
1
,
2
])
create_tensor
(
self
.
scope
,
"X0"
,
np_data0
)
np_data1
=
np
.
array
([
1
])
t
=
create_tensor
(
self
.
scope
,
"X1"
,
np_data1
)
t
.
set_dims
([
0
])
# create output variables
self
.
scope
.
var
(
"out"
)
def
test_no_empty
(
self
):
self
.
one_case
(
"X0"
,
False
)
def
test_empty
(
self
):
self
.
one_case
(
"X1"
,
True
)
def
one_case
(
self
,
input
,
target
):
op
=
Operator
(
type
=
"is_empty"
,
X
=
input
,
Out
=
"out"
)
ctx
=
core
.
DeviceContext
.
create
(
core
.
CPUPlace
())
op
.
run
(
self
.
scope
,
ctx
)
out
=
self
.
scope
.
var
(
"out"
).
get_tensor
()
self
.
assertEqual
(
np
.
array
(
out
)[
0
],
target
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/v2/fluid/tests/test_while_op.py
浏览文件 @
94e86897
...
@@ -2,6 +2,7 @@ import unittest
...
@@ -2,6 +2,7 @@ import unittest
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.layers
as
layers
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.executor
import
Executor
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.core
as
core
from
paddle.v2.fluid.backward
import
append_backward_ops
import
numpy
import
numpy
...
@@ -16,7 +17,7 @@ class TestWhileOp(unittest.TestCase):
...
@@ -16,7 +17,7 @@ class TestWhileOp(unittest.TestCase):
i
=
layers
.
zeros
(
shape
=
[
1
],
dtype
=
'int64'
)
i
=
layers
.
zeros
(
shape
=
[
1
],
dtype
=
'int64'
)
i
.
stop_gradient
=
True
i
.
stop_gradient
=
True
init
=
layers
.
zeros
(
shape
=
[
10
],
dtype
=
'float32'
)
init
=
layers
.
zeros
(
shape
=
[
10
],
dtype
=
'float32'
)
mem_array
=
layers
.
array_write
(
init
,
i
=
i
)
mem_array
=
layers
.
array_write
(
x
=
init
,
i
=
i
)
data_array
=
layers
.
array_write
(
x
=
d0
,
i
=
i
)
data_array
=
layers
.
array_write
(
x
=
d0
,
i
=
i
)
i
=
layers
.
increment
(
i
)
i
=
layers
.
increment
(
i
)
...
@@ -29,17 +30,23 @@ class TestWhileOp(unittest.TestCase):
...
@@ -29,17 +30,23 @@ class TestWhileOp(unittest.TestCase):
i
.
stop_gradient
=
True
i
.
stop_gradient
=
True
array_len
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'int64'
,
value
=
3
)
array_len
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'int64'
,
value
=
3
)
array_len
.
stop_gradient
=
True
cond
=
layers
.
less_than
(
x
=
i
,
y
=
array_len
)
cond
=
layers
.
less_than
(
x
=
i
,
y
=
array_len
)
while_op
=
layers
.
While
(
cond
=
cond
)
while_op
=
layers
.
While
(
cond
=
cond
)
with
while_op
.
block
():
with
while_op
.
block
():
d
=
layers
.
array_read
(
array
=
data_array
,
i
=
i
)
d
=
layers
.
array_read
(
array
=
data_array
,
i
=
i
)
prev
=
layers
.
array_read
(
array
=
mem_array
,
i
=
i
)
prev
=
layers
.
array_read
(
array
=
mem_array
,
i
=
i
)
i
=
layers
.
increment
(
x
=
i
,
in_place
=
True
)
result
=
layers
.
sums
(
input
=
[
d
,
prev
])
result
=
layers
.
sums
(
input
=
[
d
,
prev
])
i
=
layers
.
increment
(
x
=
i
,
in_place
=
True
)
layers
.
array_write
(
result
,
i
=
i
,
array
=
mem_array
)
layers
.
array_write
(
result
,
i
=
i
,
array
=
mem_array
)
layers
.
less_than
(
x
=
i
,
y
=
array_len
,
cond
=
cond
)
layers
.
less_than
(
x
=
i
,
y
=
array_len
,
cond
=
cond
)
sum_result
=
layers
.
array_read
(
mem_array
,
i
=
array_len
)
sum_result
=
layers
.
array_read
(
array
=
mem_array
,
i
=
i
)
loss
=
layers
.
mean
(
x
=
sum_result
)
append_backward_ops
(
loss
)
cpu
=
core
.
CPUPlace
()
cpu
=
core
.
CPUPlace
()
exe
=
Executor
(
cpu
)
exe
=
Executor
(
cpu
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录