Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
927adb4a
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
927adb4a
编写于
8月 04, 2017
作者:
C
caoying03
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' into kmax_score_layer
上级
855ae59d
78a772ea
变更
70
隐藏空白更改
内联
并排
Showing
70 changed file
with
1266 addition
and
666 deletion
+1266
-666
README.md
README.md
+1
-1
cmake/external/mkldnn.cmake
cmake/external/mkldnn.cmake
+21
-26
cmake/external/mklml.cmake
cmake/external/mklml.cmake
+11
-7
cmake/generic.cmake
cmake/generic.cmake
+15
-1
paddle/CMakeLists.txt
paddle/CMakeLists.txt
+0
-1
paddle/cuda/src/hl_cuda_cudnn.cc
paddle/cuda/src/hl_cuda_cudnn.cc
+9
-0
paddle/framework/CMakeLists.txt
paddle/framework/CMakeLists.txt
+10
-4
paddle/framework/backward.cc
paddle/framework/backward.cc
+14
-11
paddle/framework/backward_test.cc
paddle/framework/backward_test.cc
+19
-18
paddle/framework/ddim.h
paddle/framework/ddim.h
+3
-6
paddle/framework/grad_op_builder.cc
paddle/framework/grad_op_builder.cc
+71
-81
paddle/framework/grad_op_builder.h
paddle/framework/grad_op_builder.h
+16
-39
paddle/framework/grad_op_builder_test.cc
paddle/framework/grad_op_builder_test.cc
+124
-5
paddle/framework/op_registry.h
paddle/framework/op_registry.h
+12
-5
paddle/framework/operator.h
paddle/framework/operator.h
+5
-1
paddle/framework/pybind.cc
paddle/framework/pybind.cc
+68
-62
paddle/framework/tensor.h
paddle/framework/tensor.h
+6
-8
paddle/framework/tensor_impl.h
paddle/framework/tensor_impl.h
+0
-0
paddle/framework/tensor_py.h
paddle/framework/tensor_py.h
+5
-8
paddle/function/ConvOp.h
paddle/function/ConvOp.h
+7
-0
paddle/function/GemmConvOp.cpp
paddle/function/GemmConvOp.cpp
+83
-48
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
+19
-7
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
+10
-2
paddle/gserver/layers/KmaxSeqScoreLayer.cpp
paddle/gserver/layers/KmaxSeqScoreLayer.cpp
+11
-9
paddle/gserver/tests/LayerGradUtil.cpp
paddle/gserver/tests/LayerGradUtil.cpp
+13
-1
paddle/gserver/tests/LayerGradUtil.h
paddle/gserver/tests/LayerGradUtil.h
+4
-1
paddle/gserver/tests/test_KmaxSeqScore.cpp
paddle/gserver/tests/test_KmaxSeqScore.cpp
+55
-41
paddle/math/BaseMatrix.cu
paddle/math/BaseMatrix.cu
+2
-1
paddle/memory/detail/buddy_allocator.h
paddle/memory/detail/buddy_allocator.h
+1
-1
paddle/memory/detail/meta_cache.h
paddle/memory/detail/meta_cache.h
+4
-4
paddle/memory/memory.h
paddle/memory/memory.h
+1
-1
paddle/operators/CMakeLists.txt
paddle/operators/CMakeLists.txt
+6
-8
paddle/operators/add_op.cu
paddle/operators/add_op.cu
+14
-0
paddle/operators/add_op.h
paddle/operators/add_op.h
+7
-3
paddle/operators/cross_entropy_op.cu
paddle/operators/cross_entropy_op.cu
+15
-1
paddle/operators/fill_zeros_like_op.cu
paddle/operators/fill_zeros_like_op.cu
+15
-1
paddle/operators/mean_op.cc
paddle/operators/mean_op.cc
+11
-1
paddle/operators/mean_op.cu
paddle/operators/mean_op.cu
+15
-0
paddle/operators/mean_op.h
paddle/operators/mean_op.h
+22
-2
paddle/operators/mul_op.cu
paddle/operators/mul_op.cu
+1
-1
paddle/operators/mul_op.h
paddle/operators/mul_op.h
+9
-4
paddle/operators/net_op.cc
paddle/operators/net_op.cc
+3
-3
paddle/operators/net_op.h
paddle/operators/net_op.h
+11
-9
paddle/operators/net_op_design.md
paddle/operators/net_op_design.md
+0
-0
paddle/operators/net_op_test.cc
paddle/operators/net_op_test.cc
+10
-8
paddle/operators/recurrent_op.cc
paddle/operators/recurrent_op.cc
+97
-121
paddle/operators/recurrent_op.h
paddle/operators/recurrent_op.h
+17
-18
paddle/operators/recurrent_op_test.cc
paddle/operators/recurrent_op_test.cc
+25
-31
paddle/operators/rowwise_add_op.cu
paddle/operators/rowwise_add_op.cu
+14
-0
paddle/operators/sgd_op.cu
paddle/operators/sgd_op.cu
+15
-1
paddle/operators/sgd_op.h
paddle/operators/sgd_op.h
+6
-2
paddle/operators/sigmoid_op.cu
paddle/operators/sigmoid_op.cu
+14
-0
paddle/operators/sigmoid_op.h
paddle/operators/sigmoid_op.h
+5
-2
paddle/operators/softmax_op.cc
paddle/operators/softmax_op.cc
+31
-18
paddle/operators/softmax_op.cu
paddle/operators/softmax_op.cu
+16
-0
paddle/operators/softmax_op.h
paddle/operators/softmax_op.h
+47
-11
paddle/operators/type_alias.h
paddle/operators/type_alias.h
+5
-2
paddle/platform/device_context.h
paddle/platform/device_context.h
+4
-4
paddle/platform/dynload/cublas.cc
paddle/platform/dynload/cublas.cc
+14
-0
paddle/platform/dynload/cudnn.cc
paddle/platform/dynload/cudnn.cc
+15
-1
paddle/platform/dynload/curand.cc
paddle/platform/dynload/curand.cc
+18
-3
paddle/platform/place.h
paddle/platform/place.h
+1
-1
paddle/pybind/CMakeLists.txt
paddle/pybind/CMakeLists.txt
+1
-1
paddle/scripts/docker/build.sh
paddle/scripts/docker/build.sh
+2
-2
paddle/scripts/travis/build_doc.sh
paddle/scripts/travis/build_doc.sh
+2
-2
paddle/string/piece.h
paddle/string/piece.h
+2
-2
python/paddle/trainer_config_helpers/tests/configs/file_list.sh
.../paddle/trainer_config_helpers/tests/configs/file_list.sh
+1
-1
python/paddle/v2/framework/tests/CMakeLists.txt
python/paddle/v2/framework/tests/CMakeLists.txt
+2
-1
python/paddle/v2/framework/tests/gradient_checker.py
python/paddle/v2/framework/tests/gradient_checker.py
+90
-0
python/paddle/v2/framework/tests/test_softmax_op.py
python/paddle/v2/framework/tests/test_softmax_op.py
+63
-1
未找到文件。
README.md
浏览文件 @
927adb4a
...
@@ -72,7 +72,7 @@ We provide [English](http://doc.paddlepaddle.org/develop/doc/) and
...
@@ -72,7 +72,7 @@ We provide [English](http://doc.paddlepaddle.org/develop/doc/) and
-
[
Deep Learning 101
](
http://book.paddlepaddle.org/index.html
)
-
[
Deep Learning 101
](
http://book.paddlepaddle.org/index.html
)
You might want to start from th
e th
is online interactive book that can run in Jupyter Notebook.
You might want to start from this online interactive book that can run in Jupyter Notebook.
-
[
Distributed Training
](
http://doc.paddlepaddle.org/develop/doc/howto/usage/cluster/cluster_train_en.html
)
-
[
Distributed Training
](
http://doc.paddlepaddle.org/develop/doc/howto/usage/cluster/cluster_train_en.html
)
...
...
cmake/external/mkldnn.cmake
浏览文件 @
927adb4a
...
@@ -20,34 +20,30 @@ INCLUDE(ExternalProject)
...
@@ -20,34 +20,30 @@ INCLUDE(ExternalProject)
SET
(
MKLDNN_PROJECT
"extern_mkldnn"
)
SET
(
MKLDNN_PROJECT
"extern_mkldnn"
)
SET
(
MKLDNN_SOURCES_DIR
${
THIRD_PARTY_PATH
}
/mkldnn
)
SET
(
MKLDNN_SOURCES_DIR
${
THIRD_PARTY_PATH
}
/mkldnn
)
SET
(
MKLDNN_INSTALL_ROOT
${
CMAKE_INSTALL_PREFIX
}
)
SET
(
MKLDNN_INSTALL_DIR
${
THIRD_PARTY_PATH
}
/install/mkldnn
)
IF
(
NOT
"$ENV{HOME}"
STREQUAL
"/root"
)
SET
(
MKLDNN_INC_DIR
"
${
MKLDNN_INSTALL_DIR
}
/include"
CACHE PATH
"mkldnn include directory."
FORCE
)
SET
(
MKLDNN_INSTALL_ROOT
"$ENV{HOME}"
)
ENDIF
()
SET
(
MKLDNN_INSTALL_DIR
"
${
MKLDNN_INSTALL_ROOT
}
/opt/paddle/third_party/mkldnn"
)
SET
(
MKLDNN_INCLUDE_DIR
"
${
MKLDNN_INSTALL_DIR
}
/include"
CACHE PATH
"mkldnn include directory."
FORCE
)
IF
(
WIN32
)
IF
(
WIN32 OR APPLE
)
MESSAGE
(
WARNING
"It is not supported compiling with mkldnn in windows Paddle yet."
MESSAGE
(
WARNING
"Force WITH_MKLDNN=OFF"
)
"Windows or Mac is not supported with MKLDNN in Paddle yet."
SET
(
WITH_MKLDNN OFF
)
"Force WITH_MKLDNN=OFF"
)
SET
(
WITH_MKLDNN OFF CACHE STRING
"Disable MKLDNN in Windows and MacOS"
FORCE
)
return
()
return
()
ELSE
(
WIN32
)
ENDIF
()
SET
(
MKLDNN_LIBRARY
"
${
MKLDNN_INSTALL_DIR
}
/lib/libmkldnn.so"
CACHE FILEPATH
"mkldnn library."
FORCE
)
MESSAGE
(
STATUS
"Set
${
MKLDNN_INSTALL_DIR
}
/lib to runtime path"
)
SET
(
MKLDNN_LIB
"
${
MKLDNN_INSTALL_DIR
}
/lib/libmkldnn.so"
CACHE FILEPATH
"mkldnn library."
FORCE
)
SET
(
CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE
)
MESSAGE
(
STATUS
"Set
${
MKLDNN_INSTALL_DIR
}
/lib to runtime path"
)
#SET(CMAKE_MACOSX_RPATH 1) # hold for MacOS
SET
(
CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE
)
SET
(
CMAKE_INSTALL_RPATH
"
${
CMAKE_INSTALL_RPATH
}
"
"
${
MKLDNN_INSTALL_DIR
}
/lib"
)
SET
(
CMAKE_INSTALL_RPATH
"
${
CMAKE_INSTALL_RPATH
}
"
"
${
MKLDNN_INSTALL_DIR
}
/lib"
)
ENDIF
(
WIN32
)
INCLUDE_DIRECTORIES
(
${
MKLDNN_INC
LUDE
_DIR
}
)
INCLUDE_DIRECTORIES
(
${
MKLDNN_INC_DIR
}
)
IF
(
${
CBLAS_PROVIDER
}
STREQUAL
"MKLML"
)
IF
(
${
CBLAS_PROVIDER
}
STREQUAL
"MKLML"
)
SET
(
MKLDNN_DEPENDS
${
MKLML_PROJECT
}
)
SET
(
MKLDNN_DEPENDS
${
MKLML_PROJECT
}
)
SET
(
MKLDNN_MKLROOT
${
MKLML_ROOT
}
)
SET
(
MKLDNN_MKLROOT
${
MKLML_ROOT
}
)
SET
(
MKLDNN_IOMP_LIB
${
MKLML_IOMP_LIB
}
)
SET
(
MKLDNN_IOMP_LIB
${
MKLML_IOMP_LIB
}
)
SET
(
MKLDNN_IOMP_DIR
${
MKLML_LIB_DIR
}
)
SET
(
MKLDNN_IOMP_DIR
${
MKLML_LIB_DIR
}
)
MESSAGE
(
STATUS
"Build MKLDNN with
${
MKLDNN_MKLROOT
}
"
)
ENDIF
()
ENDIF
()
ExternalProject_Add
(
ExternalProject_Add
(
...
@@ -57,16 +53,15 @@ ExternalProject_Add(
...
@@ -57,16 +53,15 @@ ExternalProject_Add(
GIT_REPOSITORY
"https://github.com/01org/mkl-dnn.git"
GIT_REPOSITORY
"https://github.com/01org/mkl-dnn.git"
GIT_TAG
"v0.9"
GIT_TAG
"v0.9"
PREFIX
${
MKLDNN_SOURCES_DIR
}
PREFIX
${
MKLDNN_SOURCES_DIR
}
CONFIGURE_COMMAND mkdir -p <SOURCE_DIR>/build
BUILD_COMMAND cd <SOURCE_DIR>/build
&& cmake .. -DCMAKE_INSTALL_PREFIX=
${
MKLDNN_INSTALL_DIR
}
-DMKLROOT=
${
MKLDNN_MKLROOT
}
&& $
(
MAKE
)
INSTALL_COMMAND cd <SOURCE_DIR>/build && $
(
MAKE
)
install
UPDATE_COMMAND
""
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
${
MKLDNN_INSTALL_DIR
}
CMAKE_ARGS -DMKLROOT=
${
MKLDNN_MKLROOT
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
MKLDNN_INSTALL_DIR
}
-DMKLROOT:PATH=
${
MKLDNN_MKLROOT
}
)
)
ADD_LIBRARY
(
mkldnn SHARED IMPORTED GLOBAL
)
ADD_LIBRARY
(
mkldnn SHARED IMPORTED GLOBAL
)
SET_PROPERTY
(
TARGET mkldnn PROPERTY IMPORTED_LOCATION
${
MKLDNN_LIB
RARY
}
)
SET_PROPERTY
(
TARGET mkldnn PROPERTY IMPORTED_LOCATION
${
MKLDNN_LIB
}
)
ADD_DEPENDENCIES
(
mkldnn
${
MKLDNN_PROJECT
}
)
ADD_DEPENDENCIES
(
mkldnn
${
MKLDNN_PROJECT
}
)
MESSAGE
(
STATUS
"Mkldnn library:
${
MKLDNN_LIB
RARY
}
"
)
MESSAGE
(
STATUS
"Mkldnn library:
${
MKLDNN_LIB
}
"
)
LIST
(
APPEND external_project_dependencies mkldnn
)
LIST
(
APPEND external_project_dependencies mkldnn
)
cmake/external/mklml.cmake
浏览文件 @
927adb4a
...
@@ -16,19 +16,23 @@ IF(NOT ${WITH_MKLML})
...
@@ -16,19 +16,23 @@ IF(NOT ${WITH_MKLML})
return
()
return
()
ENDIF
(
NOT
${
WITH_MKLML
}
)
ENDIF
(
NOT
${
WITH_MKLML
}
)
IF
(
WIN32 OR APPLE
)
MESSAGE
(
WARNING
"Windows or Mac is not supported with MKLML in Paddle yet."
"Force WITH_MKLML=OFF"
)
SET
(
WITH_MKLML OFF CACHE STRING
"Disable MKLML package in Windows and MacOS"
FORCE
)
return
()
ENDIF
()
INCLUDE
(
ExternalProject
)
INCLUDE
(
ExternalProject
)
SET
(
MKLML_PROJECT
"extern_mklml"
)
SET
(
MKLML_PROJECT
"extern_mklml"
)
SET
(
MKLML_VER
"mklml_lnx_2018.0.20170
425
"
)
SET
(
MKLML_VER
"mklml_lnx_2018.0.20170
720
"
)
SET
(
MKLML_URL
"https://github.com/01org/mkl-dnn/releases/download/v0.9/
${
MKLML_VER
}
.tgz"
)
SET
(
MKLML_URL
"https://github.com/01org/mkl-dnn/releases/download/v0.9/
${
MKLML_VER
}
.tgz"
)
SET
(
MKLML_SOURCE_DIR
"
${
THIRD_PARTY_PATH
}
/mklml"
)
SET
(
MKLML_SOURCE_DIR
"
${
THIRD_PARTY_PATH
}
/mklml"
)
SET
(
MKLML_DOWNLOAD_DIR
"
${
MKLML_SOURCE_DIR
}
/src/
${
MKLML_PROJECT
}
"
)
SET
(
MKLML_DOWNLOAD_DIR
"
${
MKLML_SOURCE_DIR
}
/src/
${
MKLML_PROJECT
}
"
)
SET
(
MKLML_DST_DIR
"opt/paddle/third_party/mklml"
)
SET
(
MKLML_DST_DIR
"mklml"
)
SET
(
MKLML_INSTALL_ROOT
"
${
CMAKE_INSTALL_PREFIX
}
"
)
SET
(
MKLML_INSTALL_ROOT
"
${
THIRD_PARTY_PATH
}
/install"
)
IF
(
NOT
"$ENV{HOME}"
STREQUAL
"/root"
)
SET
(
MKLML_INSTALL_ROOT
"$ENV{HOME}"
)
ENDIF
()
SET
(
MKLML_INSTALL_DIR
${
MKLML_INSTALL_ROOT
}
/
${
MKLML_DST_DIR
}
)
SET
(
MKLML_INSTALL_DIR
${
MKLML_INSTALL_ROOT
}
/
${
MKLML_DST_DIR
}
)
SET
(
MKLML_ROOT
${
MKLML_INSTALL_DIR
}
/
${
MKLML_VER
}
)
SET
(
MKLML_ROOT
${
MKLML_INSTALL_DIR
}
/
${
MKLML_VER
}
)
SET
(
MKLML_INC_DIR
${
MKLML_ROOT
}
/include
)
SET
(
MKLML_INC_DIR
${
MKLML_ROOT
}
/include
)
...
...
cmake/generic.cmake
浏览文件 @
927adb4a
...
@@ -187,7 +187,13 @@ function(cc_library TARGET_NAME)
...
@@ -187,7 +187,13 @@ function(cc_library TARGET_NAME)
endif
()
endif
()
# cpplint code style
# cpplint code style
add_style_check_target
(
${
TARGET_NAME
}
${
cc_library_SRCS
}
)
foreach
(
source_file
${
cc_library_SRCS
}
)
string
(
REGEX REPLACE
"
\\
.[^.]*$"
""
source
${
source_file
}
)
if
(
EXISTS
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
source
}
.h
)
list
(
APPEND cc_library_HEADERS
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
source
}
.h
)
endif
()
endforeach
()
add_style_check_target
(
${
TARGET_NAME
}
${
cc_library_SRCS
}
${
cc_library_HEADERS
}
)
else
(
cc_library_SRCS
)
else
(
cc_library_SRCS
)
if
(
cc_library_DEPS
)
if
(
cc_library_DEPS
)
...
@@ -239,6 +245,14 @@ function(nv_library TARGET_NAME)
...
@@ -239,6 +245,14 @@ function(nv_library TARGET_NAME)
add_dependencies
(
${
TARGET_NAME
}
${
nv_library_DEPS
}
)
add_dependencies
(
${
TARGET_NAME
}
${
nv_library_DEPS
}
)
target_link_libraries
(
${
TARGET_NAME
}
${
nv_library_DEPS
}
)
target_link_libraries
(
${
TARGET_NAME
}
${
nv_library_DEPS
}
)
endif
()
endif
()
# cpplint code style
foreach
(
source_file
${
nv_library_SRCS
}
)
string
(
REGEX REPLACE
"
\\
.[^.]*$"
""
source
${
source_file
}
)
if
(
EXISTS
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
source
}
.h
)
list
(
APPEND cc_library_HEADERS
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
source
}
.h
)
endif
()
endforeach
()
add_style_check_target
(
${
TARGET_NAME
}
${
nv_library_SRCS
}
${
nv_library_HEADERS
}
)
else
(
nv_library_SRCS
)
else
(
nv_library_SRCS
)
if
(
nv_library_DEPS
)
if
(
nv_library_DEPS
)
merge_static_libs
(
${
TARGET_NAME
}
${
nv_library_DEPS
}
)
merge_static_libs
(
${
TARGET_NAME
}
${
nv_library_DEPS
}
)
...
...
paddle/CMakeLists.txt
浏览文件 @
927adb4a
...
@@ -15,7 +15,6 @@ if(Boost_FOUND)
...
@@ -15,7 +15,6 @@ if(Boost_FOUND)
add_subdirectory
(
platform
)
add_subdirectory
(
platform
)
add_subdirectory
(
framework
)
add_subdirectory
(
framework
)
add_subdirectory
(
operators
)
add_subdirectory
(
operators
)
add_subdirectory
(
pybind
)
endif
()
endif
()
if
(
WITH_C_API
)
if
(
WITH_C_API
)
...
...
paddle/cuda/src/hl_cuda_cudnn.cc
浏览文件 @
927adb4a
...
@@ -1022,6 +1022,15 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
...
@@ -1022,6 +1022,15 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
real
alpha
=
1.0
f
;
real
alpha
=
1.0
f
;
real
beta
=
1.0
f
;
real
beta
=
1.0
f
;
cudnnBatchNormMode_t
mode
=
CUDNN_BATCHNORM_SPATIAL
;
cudnnBatchNormMode_t
mode
=
CUDNN_BATCHNORM_SPATIAL
;
int
batch_size
=
((
cudnn_tensor_descriptor
)
inputDesc
)
->
batch_size
;
if
(
batch_size
>
1024
&&
g_cudnn_lib_version
<
6000
)
{
LOG
(
INFO
)
<<
" To process current batch data with size "
<<
batch_size
<<
" (>1024), cudnnBatchNorm requires cuDNN version >= 6000."
<<
" If there is an error complaining CUDNN_STATUS_NOT_SUPPORTED,"
<<
" just recompile PaddlePaddle with cuDNN >= 6000, replacing"
<<
" current version "
<<
g_cudnn_lib_version
;
}
CHECK_CUDNN
(
CHECK_CUDNN
(
dynload
::
cudnnBatchNormalizationForwardInference
(
t_resource
.
cudnn_handle
,
dynload
::
cudnnBatchNormalizationForwardInference
(
t_resource
.
cudnn_handle
,
mode
,
mode
,
...
...
paddle/framework/CMakeLists.txt
浏览文件 @
927adb4a
...
@@ -31,8 +31,14 @@ py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc.
...
@@ -31,8 +31,14 @@ py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc.
add_custom_target
(
framework_py_proto_init ALL COMMAND
${
CMAKE_COMMAND
}
-E touch __init__.py
)
add_custom_target
(
framework_py_proto_init ALL COMMAND
${
CMAKE_COMMAND
}
-E touch __init__.py
)
add_dependencies
(
framework_py_proto framework_py_proto_init
)
add_dependencies
(
framework_py_proto framework_py_proto_init
)
cc_library
(
net SRCS net.cc DEPS op_registry
)
cc_library
(
backward SRCS backward.cc DEPS net_op
)
cc_test
(
net_op_test SRCS net_op_test.cc DEPS net
)
cc_library
(
backward SRCS backward.cc DEPS net
)
cc_test
(
backward_test SRCS backward_test.cc DEPS backward
)
cc_test
(
backward_test SRCS backward_test.cc DEPS backward
)
cc_library
(
paddle_pybind SHARED
SRCS pybind.cc
DEPS pybind python backward
fc_op
sgd_op
add_op
mean_op
cross_entropy_op
recurrent_op
)
paddle/framework/backward.cc
浏览文件 @
927adb4a
...
@@ -14,8 +14,8 @@
...
@@ -14,8 +14,8 @@
#include "paddle/framework/backward.h"
#include "paddle/framework/backward.h"
#include <list>
#include <list>
#include "paddle/framework/net.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/net_op.h"
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
@@ -32,7 +32,7 @@ static bool AllInSet(const std::vector<std::string>& names,
...
@@ -32,7 +32,7 @@ static bool AllInSet(const std::vector<std::string>& names,
}
}
static
std
::
shared_ptr
<
OperatorBase
>
NOP
()
{
static
std
::
shared_ptr
<
OperatorBase
>
NOP
()
{
auto
net_op
=
std
::
make_shared
<
NetOp
>
();
auto
net_op
=
std
::
make_shared
<
operators
::
NetOp
>
();
net_op
->
type_
=
"@NOP@"
;
net_op
->
type_
=
"@NOP@"
;
net_op
->
CompleteAddOp
();
net_op
->
CompleteAddOp
();
return
net_op
;
return
net_op
;
...
@@ -42,9 +42,9 @@ static std::shared_ptr<OperatorBase> NOP() {
...
@@ -42,9 +42,9 @@ static std::shared_ptr<OperatorBase> NOP() {
//
//
// no_grad_names the gradient variable names without gradient calculating.
// no_grad_names the gradient variable names without gradient calculating.
//
//
// uniq_id is a unique index used inside recursively calling
BackwardRecursive.
// uniq_id is a unique index used inside recursively calling
//
use `uid = uniq_id++;` to get the unique index, and pass `uniq_id` through
//
BackwardRecursive. use `uid = uniq_id++;` to get the unique index, and
// recursive calling.
//
pass `uniq_id` through
recursive calling.
//
//
// returns The backward operator. For simple situation, it is a simple
// returns The backward operator. For simple situation, it is a simple
// operator. For complex situation, it is a NetOp.
// operator. For complex situation, it is a NetOp.
...
@@ -64,8 +64,8 @@ std::shared_ptr<OperatorBase> BackwardRecursive(
...
@@ -64,8 +64,8 @@ std::shared_ptr<OperatorBase> BackwardRecursive(
return
NOP
();
return
NOP
();
}
}
// All output gradients of forwarding operator do not need to calculate.
Then
// All output gradients of forwarding operator do not need to calculate.
// all input gradients cannot be computed at all, and we put them into
//
Then
all input gradients cannot be computed at all, and we put them into
// `no_grad_names` set. Return an NOP.
// `no_grad_names` set. Return an NOP.
if
(
AllInSet
(
forwardOp
.
outputs_
,
OperatorBase
::
GRAD_VAR_SUFFIX
(),
if
(
AllInSet
(
forwardOp
.
outputs_
,
OperatorBase
::
GRAD_VAR_SUFFIX
(),
no_grad_names
))
{
no_grad_names
))
{
...
@@ -77,14 +77,14 @@ std::shared_ptr<OperatorBase> BackwardRecursive(
...
@@ -77,14 +77,14 @@ std::shared_ptr<OperatorBase> BackwardRecursive(
}
}
// Returned gradient network
// Returned gradient network
auto
net
=
std
::
make_shared
<
NetOp
>
();
auto
net
=
std
::
make_shared
<
operators
::
NetOp
>
();
if
(
forwardOp
.
IsNetOp
())
{
if
(
forwardOp
.
IsNetOp
())
{
// Because forwardOp is a net op, it can static_cast.
// Because forwardOp is a net op, it can static_cast.
auto
&
forwardNet
=
static_cast
<
const
NetOp
&>
(
forwardOp
);
auto
&
forwardNet
=
static_cast
<
const
operators
::
NetOp
&>
(
forwardOp
);
// Map from output gradient variable name to operator's indices in
backward
// Map from output gradient variable name to operator's indices in
// net. That operator generates that variable.
//
backward
net. That operator generates that variable.
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
size_t
>>
dup_output_ops
;
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
size_t
>>
dup_output_ops
;
size_t
local_op_id
=
0
;
size_t
local_op_id
=
0
;
...
@@ -168,6 +168,9 @@ std::shared_ptr<OperatorBase> Backward(
...
@@ -168,6 +168,9 @@ std::shared_ptr<OperatorBase> Backward(
std
::
unordered_set
<
std
::
string
>
no_grad_names
;
std
::
unordered_set
<
std
::
string
>
no_grad_names
;
no_grad_names
.
reserve
(
no_grad_vars
.
size
());
no_grad_names
.
reserve
(
no_grad_vars
.
size
());
no_grad_names
.
insert
(
OperatorBase
::
EMPTY_VAR_NAME
()
+
OperatorBase
::
GRAD_VAR_SUFFIX
());
for
(
auto
&
name
:
no_grad_vars
)
{
for
(
auto
&
name
:
no_grad_vars
)
{
no_grad_names
.
insert
(
name
+
OperatorBase
::
GRAD_VAR_SUFFIX
());
no_grad_names
.
insert
(
name
+
OperatorBase
::
GRAD_VAR_SUFFIX
());
}
}
...
...
paddle/framework/backward_test.cc
浏览文件 @
927adb4a
...
@@ -15,8 +15,9 @@
...
@@ -15,8 +15,9 @@
#include "paddle/framework/backward.h"
#include "paddle/framework/backward.h"
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include "paddle/framework/net.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/net_op.h"
#include "paddle/operators/type_alias.h"
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
@@ -70,7 +71,7 @@ class NoGradOpMaker : public OpProtoAndCheckerMaker {
...
@@ -70,7 +71,7 @@ class NoGradOpMaker : public OpProtoAndCheckerMaker {
}
}
};
};
class
FcOp
:
public
NetOp
{
class
FcOp
:
public
ops
::
NetOp
{
public:
public:
void
Init
()
override
{
void
Init
()
override
{
AddOp
(
OpRegistry
::
CreateOp
(
"mul"
,
{
Input
(
"X"
),
Input
(
"W"
)},
AddOp
(
OpRegistry
::
CreateOp
(
"mul"
,
{
Input
(
"X"
),
Input
(
"W"
)},
...
@@ -161,8 +162,8 @@ TEST(Backward, simple_op_grad) {
...
@@ -161,8 +162,8 @@ TEST(Backward, simple_op_grad) {
auto
fwd
=
f
::
OpRegistry
::
CreateOp
(
"rowwise_add"
,
{
"X"
,
"b"
},
{
"Out"
},
{});
auto
fwd
=
f
::
OpRegistry
::
CreateOp
(
"rowwise_add"
,
{
"X"
,
"b"
},
{
"Out"
},
{});
ASSERT_NE
(
fwd
,
nullptr
);
ASSERT_NE
(
fwd
,
nullptr
);
auto
gop
=
f
::
OpRegistry
::
CreateGradOp
(
*
fwd
);
auto
gop
=
f
::
OpRegistry
::
CreateGradOp
(
*
fwd
);
ASSERT_EQ
(
1
UL
,
gop
->
inputs_
.
size
());
ASSERT_EQ
(
4
UL
,
gop
->
inputs_
.
size
());
ASSERT_EQ
(
"Out"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
(),
gop
->
inputs_
[
0
]);
ASSERT_EQ
(
f
::
OperatorBase
::
EMPTY_VAR_NAME
(),
gop
->
inputs_
[
0
]);
ASSERT_EQ
(
"rowwise_add_grad"
,
gop
->
type_
);
ASSERT_EQ
(
"rowwise_add_grad"
,
gop
->
type_
);
ASSERT_EQ
(
"X"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
(),
gop
->
outputs_
[
0
]);
ASSERT_EQ
(
"X"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
(),
gop
->
outputs_
[
0
]);
ASSERT_EQ
(
"b"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
(),
gop
->
outputs_
[
1
]);
ASSERT_EQ
(
"b"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
(),
gop
->
outputs_
[
1
]);
...
@@ -182,7 +183,8 @@ TEST(Backward, simple_op_not_need_grad) {
...
@@ -182,7 +183,8 @@ TEST(Backward, simple_op_not_need_grad) {
auto
no_input_gop
=
f
::
Backward
(
*
fwd
,
{
"X"
,
"b"
});
auto
no_input_gop
=
f
::
Backward
(
*
fwd
,
{
"X"
,
"b"
});
ASSERT_NE
(
no_input_gop
,
nullptr
);
ASSERT_NE
(
no_input_gop
,
nullptr
);
ASSERT_TRUE
(
no_input_gop
->
IsNetOp
());
ASSERT_TRUE
(
no_input_gop
->
IsNetOp
());
ASSERT_EQ
(
0UL
,
std
::
static_pointer_cast
<
f
::
NetOp
>
(
no_input_gop
)
->
ops_
.
size
());
ASSERT_EQ
(
0UL
,
std
::
static_pointer_cast
<
ops
::
NetOp
>
(
no_input_gop
)
->
ops_
.
size
());
}
}
TEST
(
Backward
,
net_fc_backward_normal
)
{
TEST
(
Backward
,
net_fc_backward_normal
)
{
...
@@ -191,7 +193,7 @@ TEST(Backward, net_fc_backward_normal) {
...
@@ -191,7 +193,7 @@ TEST(Backward, net_fc_backward_normal) {
ASSERT_NE
(
fwd
,
nullptr
);
ASSERT_NE
(
fwd
,
nullptr
);
std
::
shared_ptr
<
f
::
OperatorBase
>
gop
=
f
::
Backward
(
*
fwd
,
{});
std
::
shared_ptr
<
f
::
OperatorBase
>
gop
=
f
::
Backward
(
*
fwd
,
{});
ASSERT_TRUE
(
gop
->
IsNetOp
());
ASSERT_TRUE
(
gop
->
IsNetOp
());
auto
net
=
static_cast
<
f
::
NetOp
*>
(
gop
.
get
());
auto
net
=
static_cast
<
ops
::
NetOp
*>
(
gop
.
get
());
ASSERT_NO_THROW
(
net
->
DebugString
());
ASSERT_NO_THROW
(
net
->
DebugString
());
...
@@ -214,7 +216,7 @@ TEST(Backward, net_fc_backward_not_have_b) {
...
@@ -214,7 +216,7 @@ TEST(Backward, net_fc_backward_not_have_b) {
ASSERT_NE
(
fwd
,
nullptr
);
ASSERT_NE
(
fwd
,
nullptr
);
std
::
shared_ptr
<
f
::
OperatorBase
>
gop
=
f
::
Backward
(
*
fwd
,
{});
std
::
shared_ptr
<
f
::
OperatorBase
>
gop
=
f
::
Backward
(
*
fwd
,
{});
ASSERT_TRUE
(
gop
->
IsNetOp
());
ASSERT_TRUE
(
gop
->
IsNetOp
());
auto
net
=
static_cast
<
f
::
NetOp
*>
(
gop
.
get
());
auto
net
=
static_cast
<
ops
::
NetOp
*>
(
gop
.
get
());
ASSERT_NO_THROW
(
net
->
DebugString
());
ASSERT_NO_THROW
(
net
->
DebugString
());
...
@@ -228,7 +230,7 @@ TEST(Backward, net_fc_backward_not_have_b) {
...
@@ -228,7 +230,7 @@ TEST(Backward, net_fc_backward_not_have_b) {
}
}
TEST
(
Backward
,
net_input_of_network_not_need_grad
)
{
TEST
(
Backward
,
net_input_of_network_not_need_grad
)
{
f
::
NetOp
net
;
ops
::
NetOp
net
;
net
.
AddOp
(
f
::
OpRegistry
::
CreateOp
(
"fc"
,
{
"X"
,
"W1"
,
"b1"
},
net
.
AddOp
(
f
::
OpRegistry
::
CreateOp
(
"fc"
,
{
"X"
,
"W1"
,
"b1"
},
{
"mul_tmp_0"
,
"add_tmp_0"
,
"hidden0"
},
{}));
{
"mul_tmp_0"
,
"add_tmp_0"
,
"hidden0"
},
{}));
net
.
AddOp
(
f
::
OpRegistry
::
CreateOp
(
"fc"
,
{
"hidden0"
,
"W2"
,
"b2"
},
net
.
AddOp
(
f
::
OpRegistry
::
CreateOp
(
"fc"
,
{
"hidden0"
,
"W2"
,
"b2"
},
...
@@ -236,7 +238,7 @@ TEST(Backward, net_input_of_network_not_need_grad) {
...
@@ -236,7 +238,7 @@ TEST(Backward, net_input_of_network_not_need_grad) {
net
.
CompleteAddOp
();
net
.
CompleteAddOp
();
auto
bwd
=
Backward
(
net
,
{
"X"
});
// X@GRAD is not need.
auto
bwd
=
Backward
(
net
,
{
"X"
});
// X@GRAD is not need.
ASSERT_TRUE
(
bwd
->
IsNetOp
());
ASSERT_TRUE
(
bwd
->
IsNetOp
());
auto
bwd_net
=
static_cast
<
f
::
NetOp
*>
(
bwd
.
get
());
auto
bwd_net
=
static_cast
<
ops
::
NetOp
*>
(
bwd
.
get
());
std
::
unordered_set
<
std
::
string
>
all_output
=
std
::
unordered_set
<
std
::
string
>
(
std
::
unordered_set
<
std
::
string
>
all_output
=
std
::
unordered_set
<
std
::
string
>
(
bwd_net
->
outputs_
.
begin
(),
bwd_net
->
outputs_
.
end
());
bwd_net
->
outputs_
.
begin
(),
bwd_net
->
outputs_
.
end
());
...
@@ -253,7 +255,7 @@ TEST(Backward, net_input_of_network_not_need_grad) {
...
@@ -253,7 +255,7 @@ TEST(Backward, net_input_of_network_not_need_grad) {
ASSERT_EQ
(
2UL
,
bwd_net
->
ops_
.
size
());
ASSERT_EQ
(
2UL
,
bwd_net
->
ops_
.
size
());
ASSERT_TRUE
(
bwd_net
->
ops_
[
1
]
->
IsNetOp
());
ASSERT_TRUE
(
bwd_net
->
ops_
[
1
]
->
IsNetOp
());
auto
first_fc_grad
=
static_cast
<
f
::
NetOp
*>
(
bwd_net
->
ops_
[
1
].
get
());
auto
first_fc_grad
=
static_cast
<
ops
::
NetOp
*>
(
bwd_net
->
ops_
[
1
].
get
());
ASSERT_EQ
(
3UL
,
first_fc_grad
->
ops_
.
size
());
ASSERT_EQ
(
3UL
,
first_fc_grad
->
ops_
.
size
());
ASSERT_EQ
(
ASSERT_EQ
(
f
::
OperatorBase
::
EMPTY_VAR_NAME
(),
f
::
OperatorBase
::
EMPTY_VAR_NAME
(),
...
@@ -261,14 +263,14 @@ TEST(Backward, net_input_of_network_not_need_grad) {
...
@@ -261,14 +263,14 @@ TEST(Backward, net_input_of_network_not_need_grad) {
}
}
TEST
(
Backward
,
net_shared_weight
)
{
TEST
(
Backward
,
net_shared_weight
)
{
f
::
NetOp
net
;
ops
::
NetOp
net
;
net
.
AddOp
(
f
::
OpRegistry
::
CreateOp
(
"mul"
,
{
"X"
,
"W"
},
{
"Out"
},
{}));
net
.
AddOp
(
f
::
OpRegistry
::
CreateOp
(
"mul"
,
{
"X"
,
"W"
},
{
"Out"
},
{}));
net
.
AddOp
(
f
::
OpRegistry
::
CreateOp
(
"mul"
,
{
"Out"
,
"W"
},
{
"FinalOut"
},
{}));
net
.
AddOp
(
f
::
OpRegistry
::
CreateOp
(
"mul"
,
{
"Out"
,
"W"
},
{
"FinalOut"
},
{}));
net
.
CompleteAddOp
();
net
.
CompleteAddOp
();
auto
bwd
=
f
::
Backward
(
net
,
{});
auto
bwd
=
f
::
Backward
(
net
,
{});
ASSERT_TRUE
(
bwd
->
IsNetOp
());
ASSERT_TRUE
(
bwd
->
IsNetOp
());
auto
bwd_net
=
static_cast
<
f
::
NetOp
*>
(
bwd
.
get
());
auto
bwd_net
=
static_cast
<
ops
::
NetOp
*>
(
bwd
.
get
());
ASSERT_EQ
(
3UL
,
bwd_net
->
ops_
.
size
());
ASSERT_EQ
(
3UL
,
bwd_net
->
ops_
.
size
());
ASSERT_EQ
(
"add"
,
bwd_net
->
ops_
[
2
]
->
type_
);
ASSERT_EQ
(
"add"
,
bwd_net
->
ops_
[
2
]
->
type_
);
}
}
...
@@ -285,7 +287,7 @@ TEST(Backward, op_all_input_are_not_need) {
...
@@ -285,7 +287,7 @@ TEST(Backward, op_all_input_are_not_need) {
auto
fwd
=
f
::
OpRegistry
::
CreateOp
(
"rowwise_add"
,
{
"X"
,
"b"
},
{
"Out"
},
{});
auto
fwd
=
f
::
OpRegistry
::
CreateOp
(
"rowwise_add"
,
{
"X"
,
"b"
},
{
"Out"
},
{});
auto
backward
=
f
::
Backward
(
*
fwd
,
{
"X"
,
"b"
});
auto
backward
=
f
::
Backward
(
*
fwd
,
{
"X"
,
"b"
});
ASSERT_TRUE
(
backward
->
IsNetOp
());
ASSERT_TRUE
(
backward
->
IsNetOp
());
auto
net
=
static_cast
<
f
::
NetOp
*>
(
backward
.
get
());
auto
net
=
static_cast
<
ops
::
NetOp
*>
(
backward
.
get
());
ASSERT_TRUE
(
net
->
ops_
.
empty
());
ASSERT_TRUE
(
net
->
ops_
.
empty
());
}
}
...
@@ -293,7 +295,7 @@ TEST(Backward, op_all_output_are_not_need) {
...
@@ -293,7 +295,7 @@ TEST(Backward, op_all_output_are_not_need) {
auto
fwd
=
f
::
OpRegistry
::
CreateOp
(
"rowwise_add"
,
{
"X"
,
"b"
},
{
"Out"
},
{});
auto
fwd
=
f
::
OpRegistry
::
CreateOp
(
"rowwise_add"
,
{
"X"
,
"b"
},
{
"Out"
},
{});
auto
backward
=
f
::
Backward
(
*
fwd
,
{
"Out"
});
auto
backward
=
f
::
Backward
(
*
fwd
,
{
"Out"
});
ASSERT_TRUE
(
backward
->
IsNetOp
());
ASSERT_TRUE
(
backward
->
IsNetOp
());
auto
net
=
static_cast
<
f
::
NetOp
*>
(
backward
.
get
());
auto
net
=
static_cast
<
ops
::
NetOp
*>
(
backward
.
get
());
ASSERT_TRUE
(
net
->
ops_
.
empty
());
ASSERT_TRUE
(
net
->
ops_
.
empty
());
}
}
...
@@ -301,7 +303,7 @@ TEST(Backward, op_part_of_output_are_not_need) {
...
@@ -301,7 +303,7 @@ TEST(Backward, op_part_of_output_are_not_need) {
auto
fwd
=
f
::
OpRegistry
::
CreateOp
(
"many_output_op"
,
{
"X"
},
{
"Y"
,
"Z"
},
{});
auto
fwd
=
f
::
OpRegistry
::
CreateOp
(
"many_output_op"
,
{
"X"
},
{
"Y"
,
"Z"
},
{});
auto
backward
=
f
::
Backward
(
*
fwd
,
{
"Z"
});
auto
backward
=
f
::
Backward
(
*
fwd
,
{
"Z"
});
ASSERT_TRUE
(
backward
->
IsNetOp
());
ASSERT_TRUE
(
backward
->
IsNetOp
());
auto
net
=
static_cast
<
f
::
NetOp
*>
(
backward
.
get
());
auto
net
=
static_cast
<
ops
::
NetOp
*>
(
backward
.
get
());
ASSERT_EQ
(
net
->
ops_
.
size
(),
2UL
);
ASSERT_EQ
(
net
->
ops_
.
size
(),
2UL
);
auto
&
fill_zero
=
*
net
->
ops_
[
0
];
auto
&
fill_zero
=
*
net
->
ops_
[
0
];
...
@@ -341,7 +343,7 @@ TEST(Backward, op_part_of_input_are_not_need) {
...
@@ -341,7 +343,7 @@ TEST(Backward, op_part_of_input_are_not_need) {
}
}
TEST
(
Backward
,
linear_net_intermediate_variable_has_no_grad
)
{
TEST
(
Backward
,
linear_net_intermediate_variable_has_no_grad
)
{
f
::
NetOp
net
;
ops
::
NetOp
net
;
net
.
AddOp
(
f
::
OpRegistry
::
CreateOp
(
"fc"
,
{
"x1"
,
"w1"
,
"b1"
},
net
.
AddOp
(
f
::
OpRegistry
::
CreateOp
(
"fc"
,
{
"x1"
,
"w1"
,
"b1"
},
{
"mul_out1"
,
"add_out1"
,
"out1"
},
{}));
{
"mul_out1"
,
"add_out1"
,
"out1"
},
{}));
net
.
AddOp
(
f
::
OpRegistry
::
CreateOp
(
"fc"
,
{
"out1"
,
"w2"
,
"b2"
},
net
.
AddOp
(
f
::
OpRegistry
::
CreateOp
(
"fc"
,
{
"out1"
,
"w2"
,
"b2"
},
...
@@ -351,14 +353,13 @@ TEST(Backward, linear_net_intermediate_variable_has_no_grad) {
...
@@ -351,14 +353,13 @@ TEST(Backward, linear_net_intermediate_variable_has_no_grad) {
net
.
CompleteAddOp
();
net
.
CompleteAddOp
();
auto
backward
=
f
::
Backward
(
net
,
{
"mul_out2"
,
"tmp_out2"
,
"out2"
});
auto
backward
=
f
::
Backward
(
net
,
{
"mul_out2"
,
"tmp_out2"
,
"out2"
});
ASSERT_TRUE
(
backward
->
IsNetOp
());
ASSERT_TRUE
(
backward
->
IsNetOp
());
auto
bwd_net
=
static_cast
<
f
::
NetOp
*>
(
backward
.
get
());
auto
bwd_net
=
static_cast
<
ops
::
NetOp
*>
(
backward
.
get
());
ASSERT_EQ
(
bwd_net
->
ops_
.
size
(),
3UL
);
ASSERT_EQ
(
bwd_net
->
ops_
.
size
(),
3UL
);
auto
&
grad_fc
=
*
bwd_net
->
ops_
[
0
];
auto
&
grad_fc
=
*
bwd_net
->
ops_
[
0
];
EXPECT_EQ
(
grad_fc
.
inputs_
.
size
(),
EXPECT_EQ
(
grad_fc
.
inputs_
.
size
(),
3UL
/* external input number */
3UL
/* external input number */
+
1UL
/* external output number*/
+
1UL
/* external output number*/
+
1UL
/* number of gradient of external output*/
+
1UL
/* number of gradient of external output*/
-
1UL
/*ignoreGradient varable number*/
+
2U
/* internal variable number*/
);
+
2U
/* internal variable number*/
);
EXPECT_EQ
(
grad_fc
.
outputs_
.
size
(),
2UL
/* input number of mul*/
EXPECT_EQ
(
grad_fc
.
outputs_
.
size
(),
2UL
/* input number of mul*/
+
2UL
/* input number of rowwise_add */
+
2UL
/* input number of rowwise_add */
...
...
paddle/framework/ddim.h
浏览文件 @
927adb4a
...
@@ -25,18 +25,15 @@ limitations under the License. */
...
@@ -25,18 +25,15 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
namespace
{
typedef
boost
::
variant
<
Dim
<
1
>
,
Dim
<
2
>
,
Dim
<
3
>
,
Dim
<
4
>
,
Dim
<
5
>
,
Dim
<
6
>
,
Dim
<
7
>
,
Dim
<
8
>
,
Dim
<
9
>>
DDimVar
;
}
/**
/**
* \brief A dynamically sized dimension.
* \brief A dynamically sized dimension.
*
*
* The number of dimensions must be between [1, 9].
* The number of dimensions must be between [1, 9].
*/
*/
struct
DDim
{
struct
DDim
{
typedef
boost
::
variant
<
Dim
<
1
>
,
Dim
<
2
>
,
Dim
<
3
>
,
Dim
<
4
>
,
Dim
<
5
>
,
Dim
<
6
>
,
Dim
<
7
>
,
Dim
<
8
>
,
Dim
<
9
>>
DDimVar
;
DDimVar
var
;
DDimVar
var
;
DDim
()
:
var
(
Dim
<
1
>
())
{}
DDim
()
:
var
(
Dim
<
1
>
())
{}
...
...
paddle/framework/grad_op_builder.cc
浏览文件 @
927adb4a
...
@@ -8,107 +8,97 @@ You may obtain a copy of the License at
...
@@ -8,107 +8,97 @@ You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
distributed under the License is distributed on an "AS IS" BASIS,
WITHO
UT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
WITHO
pArgType::OUT WARRANTIES OR CONDITIONS OF ANY KOpArgType::IND, either
See the License for the specific language governing permissions and
express or implied. See the License for the specific language governing
limitations under the License. */
permissions and
limitations under the License. */
#include "paddle/framework/grad_op_builder.h"
#include "paddle/framework/grad_op_builder.h"
#include "paddle/framework/op_proto.pb.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/op_registry.h"
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
OperatorBase
*
GradOpBuilder
::
Build
()
{
class
OpRegistry
;
BuildOpInOutArgList
();
std
::
string
grad_op_type
=
OpRegistry
::
grad_ops
().
at
(
op_
.
type_
);
using
VarIndexMap
=
std
::
unordered_map
<
std
::
string
,
int
>
;
OperatorBase
*
grad_op
=
OpRegistry
::
op_creators
().
at
(
grad_op_type
)();
grad_op
->
type_
=
grad_op_type
;
CompleteGradOp
(
grad_op
);
return
grad_op
;
}
OpInOutArg
*
GradOpBuilder
::
BuildArg
(
const
VarProto
&
var
,
enum
class
OpArgType
{
IN
,
OUT
};
const
VarIndexMap
&
var_map
,
const
std
::
vector
<
int
>&
format
,
static
std
::
vector
<
int
>*
GetOpFormat
(
OperatorBase
*
op
,
const
OpArgType
&
type
)
{
InOutType
type
)
{
std
::
string
key
=
type
==
OpArgType
::
IN
?
"input_format"
:
"output_format"
;
int
idx
=
var_map
.
at
(
var
.
name
());
return
op
->
attrs_
.
count
(
key
)
int
begin_idx
=
format
.
empty
()
?
idx
:
format
.
at
(
idx
);
?
&
boost
::
get
<
std
::
vector
<
int
>>
(
op
->
attrs_
.
at
(
key
))
int
end_idx
=
format
.
empty
()
?
idx
+
1
:
format
.
at
(
idx
+
1
);
:
nullptr
;
return
new
OpInOutArg
(
var
.
name
(),
type
,
!
var
.
ignore_gradient
(),
begin_idx
,
end_idx
);
}
}
void
GradOpBuilder
::
BuildOpInOutArgList
()
{
static
const
std
::
vector
<
int
>*
GetOpFormat
(
const
OperatorBase
*
op
,
const
OpProto
&
op_proto
=
OpRegistry
::
protos
().
at
(
op_
.
type_
);
const
OpArgType
&
type
)
{
const
auto
&
var_map
=
*
(
OpRegistry
::
VarIndexMaps
().
at
(
op_
.
type_
));
std
::
string
key
=
type
==
OpArgType
::
IN
?
"input_format"
:
"output_format"
;
const
std
::
vector
<
int
>&
in_format
=
return
op
->
attrs_
.
count
(
key
)
op_
.
attrs_
.
count
(
"input_format"
)
?
&
boost
::
get
<
std
::
vector
<
int
>>
(
op
->
attrs_
.
at
(
key
))
?
op_
.
GetAttr
<
std
::
vector
<
int
>>
(
"input_format"
)
:
nullptr
;
:
std
::
vector
<
int
>
();
const
std
::
vector
<
int
>&
out_format
=
op_
.
attrs_
.
count
(
"output_format"
)
?
op_
.
GetAttr
<
std
::
vector
<
int
>>
(
"output_format"
)
:
std
::
vector
<
int
>
();
for
(
const
auto
&
var
:
op_proto
.
inputs
())
{
arg_list_
.
emplace_back
(
std
::
shared_ptr
<
OpInOutArg
>
(
BuildArg
(
var
,
var_map
,
in_format
,
IN
)));
}
for
(
const
auto
&
var
:
op_proto
.
outputs
())
{
arg_list_
.
emplace_back
(
std
::
shared_ptr
<
OpInOutArg
>
(
BuildArg
(
var
,
var_map
,
out_format
,
OUT
)));
}
}
}
void
GradOpBuilder
::
AddArgIntoGradOp
(
const
OpInOutArg
*
arg
,
static
void
TransOpArg
(
const
OperatorBase
*
src_op
,
OperatorBase
*
dst_op
,
std
::
vector
<
std
::
string
>&
in_out
,
const
OpArgType
&
src_type
,
const
OpArgType
&
dst_type
,
std
::
vector
<
int
>&
format
,
int
&
idx
,
bool
is_grad
)
{
VarIndexMap
*
varmap
,
int
&
idx
,
const
std
::
vector
<
std
::
string
>&
src_inout
=
bool
is_grad
)
const
{
src_type
==
OpArgType
::
IN
?
src_op
->
inputs_
:
src_op
->
outputs_
;
std
::
string
var_name
=
arg
->
proto_name_
;
const
std
::
vector
<
int
>*
src_format
=
GetOpFormat
(
src_op
,
src_type
);
if
(
is_grad
)
{
var_name
+=
OperatorBase
::
GRAD_VAR_SUFFIX
();
std
::
vector
<
std
::
string
>&
dst_inout
=
}
dst_type
==
OpArgType
::
IN
?
dst_op
->
inputs_
:
dst_op
->
outputs_
;
(
*
varmap
)[
var_name
]
=
idx
++
;
std
::
vector
<
int
>*
dst_format
=
GetOpFormat
(
dst_op
,
dst_type
);
size_t
pre_sz
=
in_out
.
size
();
const
OpProto
&
proto
=
OpRegistry
::
protos
().
at
(
src_op
->
type_
);
auto
base_it
=
arg
->
type_
==
IN
?
op_
.
inputs_
.
begin
()
:
op_
.
outputs_
.
begin
();
const
auto
&
src_arg_list
=
std
::
copy
(
base_it
+
arg
->
begin_idx_
,
base_it
+
arg
->
end_idx_
,
src_type
==
OpArgType
::
IN
?
proto
.
inputs
()
:
proto
.
outputs
();
std
::
back_inserter
(
in_out
));
if
(
is_grad
)
{
for
(
const
auto
&
arg
:
src_arg_list
)
{
for
(
size_t
i
=
pre_sz
;
i
<
in_out
.
size
();
++
i
)
{
std
::
string
src_name
=
arg
.
name
();
in_out
[
i
]
+=
OperatorBase
::
GRAD_VAR_SUFFIX
();
std
::
string
dst_name
=
is_grad
?
src_name
+
OperatorBase
::
GRAD_VAR_SUFFIX
()
:
src_name
;
(
*
dst_op
->
in_out_idxs_
)[
dst_name
]
=
idx
++
;
int
src_arg_idx
=
src_op
->
in_out_idxs_
->
at
(
src_name
);
int
src_begin
=
src_format
==
nullptr
?
src_arg_idx
:
src_format
->
at
(
src_arg_idx
);
int
src_end
=
src_format
==
nullptr
?
src_arg_idx
+
1
:
src_format
->
at
(
src_arg_idx
+
1
);
for
(
int
i
=
src_begin
;
i
<
src_end
;
++
i
)
{
std
::
string
s
=
is_grad
?
src_inout
[
i
]
+
OperatorBase
::
GRAD_VAR_SUFFIX
()
:
arg
.
ignore_gradient
()
?
OperatorBase
::
EMPTY_VAR_NAME
()
:
src_inout
[
i
];
dst_inout
.
emplace_back
(
s
);
}
if
(
dst_format
!=
nullptr
)
{
dst_format
->
push_back
(
dst_inout
.
size
());
}
}
}
}
format
.
push_back
(
in_out
.
size
());
}
}
void
GradOpBuilder
::
CompleteGradOp
(
OperatorBase
*
grad_op
)
const
{
OperatorBase
*
BuildGradOp
(
const
OperatorBase
*
op
)
{
grad_op
->
attrs_
=
op_
.
attrs_
;
std
::
string
grad_op_type
=
OpRegistry
::
grad_ops
().
at
(
op
->
type_
);
OperatorBase
*
grad_op
=
OpRegistry
::
op_creators
().
at
(
grad_op_type
)();
grad_op
->
type_
=
grad_op_type
;
grad_op
->
attrs_
=
op
->
attrs_
;
grad_op
->
attrs_
.
erase
(
"input_format"
);
grad_op
->
attrs_
.
erase
(
"input_format"
);
grad_op
->
attrs_
.
erase
(
"output_format"
);
grad_op
->
attrs_
.
erase
(
"output_format"
);
VarIndexMap
*
grad_varmap
=
new
VarIndexMap
();
if
(
GetOpFormat
(
op
,
OpArgType
::
IN
)
!=
nullptr
)
{
grad_op
->
attrs_
[
"output_format"
]
=
std
::
vector
<
int
>
({
0
});
}
if
(
GetOpFormat
(
op
,
OpArgType
::
IN
)
!=
nullptr
||
GetOpFormat
(
op
,
OpArgType
::
OUT
)
!=
nullptr
)
{
grad_op
->
attrs_
[
"input_format"
]
=
std
::
vector
<
int
>
({
0
});
}
grad_op
->
in_out_idxs_
.
reset
(
new
VarIndexMap
());
int
in_idx
=
0
;
int
in_idx
=
0
;
int
out_idx
=
0
;
int
out_idx
=
0
;
std
::
vector
<
int
>
in_format
({
0
});
TransOpArg
(
op
,
grad_op
,
OpArgType
::
IN
,
OpArgType
::
IN
,
in_idx
,
false
);
// I
std
::
vector
<
int
>
out_format
({
0
});
TransOpArg
(
op
,
grad_op
,
OpArgType
::
OUT
,
OpArgType
::
IN
,
in_idx
,
false
);
// G
for
(
const
auto
&
arg
:
arg_list_
)
{
TransOpArg
(
op
,
grad_op
,
OpArgType
::
OUT
,
OpArgType
::
IN
,
in_idx
,
true
);
// OG
// op_'s inputs_ and outputs_
TransOpArg
(
op
,
grad_op
,
OpArgType
::
IN
,
OpArgType
::
OUT
,
out_idx
,
true
);
// IG
if
(
arg
->
needed_in_grad_
)
{
return
grad_op
;
AddArgIntoGradOp
(
arg
.
get
(),
grad_op
->
inputs_
,
in_format
,
grad_varmap
,
in_idx
,
false
);
}
if
(
arg
->
type_
==
IN
)
{
// gradients of op_'s inputs_
AddArgIntoGradOp
(
arg
.
get
(),
grad_op
->
outputs_
,
out_format
,
grad_varmap
,
out_idx
,
true
);
}
else
{
// gradients of op_'s outputs_
AddArgIntoGradOp
(
arg
.
get
(),
grad_op
->
inputs_
,
in_format
,
grad_varmap
,
in_idx
,
true
);
}
}
grad_op
->
attrs_
[
"input_format"
]
=
in_format
;
grad_op
->
attrs_
[
"output_format"
]
=
out_format
;
grad_op
->
in_out_idxs_
.
reset
(
grad_varmap
);
}
}
}
// namespace framework
}
// namespace framework
...
...
paddle/framework/grad_op_builder.h
浏览文件 @
927adb4a
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#pragma once
#include "paddle/framework/op_proto.pb.h"
#include "paddle/framework/operator.h"
#include "paddle/framework/operator.h"
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
class
OpRegistry
;
OperatorBase
*
BuildGradOp
(
const
OperatorBase
*
op
);
enum
InOutType
{
IN
,
OUT
};
struct
OpInOutArg
{
OpInOutArg
(
const
std
::
string
&
proto_name
,
const
InOutType
&
type
,
bool
needed_in_grad
,
size_t
begin_idx
,
size_t
end_idx
)
:
proto_name_
(
proto_name
),
type_
(
type
),
needed_in_grad_
(
needed_in_grad
),
begin_idx_
(
begin_idx
),
end_idx_
(
end_idx
)
{}
std
::
string
proto_name_
;
InOutType
type_
;
bool
needed_in_grad_
;
size_t
begin_idx_
;
size_t
end_idx_
;
};
class
GradOpBuilder
{
using
VarIndexMap
=
std
::
unordered_map
<
std
::
string
,
int
>
;
public:
GradOpBuilder
(
const
OperatorBase
&
op
)
:
op_
(
op
)
{}
OperatorBase
*
Build
();
private:
OpInOutArg
*
BuildArg
(
const
VarProto
&
var
,
const
VarIndexMap
&
var_map
,
const
std
::
vector
<
int
>&
format
,
InOutType
type
);
void
BuildOpInOutArgList
();
void
AddArgIntoGradOp
(
const
OpInOutArg
*
arg
,
std
::
vector
<
std
::
string
>&
in_out
,
std
::
vector
<
int
>&
format
,
VarIndexMap
*
varmap
,
int
&
idx
,
bool
is_grad
)
const
;
void
CompleteGradOp
(
OperatorBase
*
grad_op
)
const
;
const
OperatorBase
&
op_
;
std
::
vector
<
std
::
shared_ptr
<
OpInOutArg
>>
arg_list_
;
};
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/framework/grad_op_builder_test.cc
浏览文件 @
927adb4a
...
@@ -8,10 +8,49 @@ USE_OP(add_two);
...
@@ -8,10 +8,49 @@ USE_OP(add_two);
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
class
NOP
:
public
OperatorBase
{
public:
void
InferShape
(
const
Scope
&
scope
)
const
override
{}
void
Run
(
const
Scope
&
scope
,
const
platform
::
DeviceContext
&
dev_ctx
)
const
override
{}
};
class
MutiInOutOpMaker
:
public
OpProtoAndCheckerMaker
{
public:
MutiInOutOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"In1"
,
"a single input"
);
AddInput
(
"In2_mult"
,
"a multiple input"
).
SetMultiple
();
AddInput
(
"In3"
,
"another single input"
);
AddOutput
(
"Out1"
,
"a single output"
);
AddOutput
(
"Out2_mult"
,
"a multiple output"
).
SetMultiple
();
AddComment
(
"test op with multiple inputs and outputs"
);
}
};
class
IOIgnoredOpMaker
:
public
OpProtoAndCheckerMaker
{
public:
IOIgnoredOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"In1"
,
"a single input"
);
AddInput
(
"In2_mult"
,
"a multiple input"
).
SetMultiple
().
IgnoreGradient
();
AddInput
(
"In3_mult"
,
"another multiple input"
).
SetMultiple
();
AddOutput
(
"Out1_mult"
,
"a multiple output"
).
SetMultiple
();
AddOutput
(
"Out2"
,
"a single output"
).
IgnoreGradient
();
AddComment
(
"op with inputs and outputs ignored in gradient calculating"
);
}
};
}
// namespace framework
}
// namespace paddle
namespace
f
=
paddle
::
framework
;
TEST
(
GradOpBuilder
,
AddTwo
)
{
TEST
(
GradOpBuilder
,
AddTwo
)
{
std
::
shared_ptr
<
OperatorBase
>
add_op
(
std
::
shared_ptr
<
f
::
OperatorBase
>
add_op
(
OpRegistry
::
CreateOp
(
"add_two"
,
{
"x"
,
"y"
},
{
"out"
},
{}));
f
::
OpRegistry
::
CreateOp
(
"add_two"
,
{
"x"
,
"y"
},
{
"out"
},
{}));
std
::
shared_ptr
<
OperatorBase
>
grad_add_op
=
OpRegistry
::
CreateGradOp
(
*
add_op
);
std
::
shared_ptr
<
f
::
OperatorBase
>
grad_add_op
=
f
::
OpRegistry
::
CreateGradOp
(
*
add_op
);
EXPECT_EQ
(
static_cast
<
int
>
(
grad_add_op
->
inputs_
.
size
()),
4
);
EXPECT_EQ
(
static_cast
<
int
>
(
grad_add_op
->
inputs_
.
size
()),
4
);
EXPECT_EQ
(
static_cast
<
int
>
(
grad_add_op
->
outputs_
.
size
()),
2
);
EXPECT_EQ
(
static_cast
<
int
>
(
grad_add_op
->
outputs_
.
size
()),
2
);
EXPECT_EQ
(
grad_add_op
->
Input
(
"X"
),
"x"
);
EXPECT_EQ
(
grad_add_op
->
Input
(
"X"
),
"x"
);
...
@@ -22,5 +61,85 @@ TEST(GradOpBuilder, AddTwo) {
...
@@ -22,5 +61,85 @@ TEST(GradOpBuilder, AddTwo) {
EXPECT_EQ
(
grad_add_op
->
Output
(
"Y@GRAD"
),
"y@GRAD"
);
EXPECT_EQ
(
grad_add_op
->
Output
(
"Y@GRAD"
),
"y@GRAD"
);
}
}
}
// namespace framework
REGISTER_OP
(
mult_io
,
f
::
NOP
,
f
::
MutiInOutOpMaker
);
}
// namespace paddle
REGISTER_GRADIENT_OP
(
mult_io
,
mult_io_grad
,
f
::
NOP
);
\ No newline at end of file
REGISTER_OP
(
io_ignored
,
f
::
NOP
,
f
::
IOIgnoredOpMaker
);
REGISTER_GRADIENT_OP
(
io_ignored
,
io_ignored_grad
,
f
::
NOP
);
TEST
(
GradOpBuilder
,
MutiInOut
)
{
f
::
AttributeMap
attrs
{{
"input_format"
,
std
::
vector
<
int
>
{
0
,
1
,
4
,
5
}},
{
"output_format"
,
std
::
vector
<
int
>
{
0
,
1
,
3
}}};
std
::
shared_ptr
<
f
::
OperatorBase
>
test_op
(
f
::
OpRegistry
::
CreateOp
(
"mult_io"
,
{
"in1"
,
"in2_1"
,
"in2_2"
,
"in2_3"
,
"in3"
},
{
"out1"
,
"out2_1"
,
"out2_2"
},
attrs
));
std
::
shared_ptr
<
f
::
OperatorBase
>
grad_test_op
=
f
::
OpRegistry
::
CreateGradOp
(
*
test_op
);
ASSERT_EQ
(
grad_test_op
->
inputs_
.
size
(),
5UL
+
3UL
+
3UL
);
EXPECT_EQ
(
grad_test_op
->
Input
(
"In1"
),
"in1"
);
EXPECT_EQ
(
grad_test_op
->
Inputs
(
"In2_mult"
),
std
::
vector
<
std
::
string
>
({
"in2_1"
,
"in2_2"
,
"in2_3"
}));
EXPECT_EQ
(
grad_test_op
->
Input
(
"In3"
),
"in3"
);
EXPECT_EQ
(
grad_test_op
->
Input
(
"Out1"
),
"out1"
);
EXPECT_EQ
(
grad_test_op
->
Inputs
(
"Out2_mult"
),
std
::
vector
<
std
::
string
>
({
"out2_1"
,
"out2_2"
}));
EXPECT_EQ
(
grad_test_op
->
Input
(
"Out1"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
()),
"out1"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
());
EXPECT_EQ
(
grad_test_op
->
Inputs
(
"Out2_mult"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
()),
std
::
vector
<
std
::
string
>
(
{
"out2_1"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
(),
"out2_2"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
()}));
ASSERT_EQ
(
grad_test_op
->
outputs_
.
size
(),
5UL
);
EXPECT_EQ
(
grad_test_op
->
Output
(
"In1"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
()),
"in1"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
());
EXPECT_EQ
(
grad_test_op
->
Outputs
(
"In2_mult"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
()),
std
::
vector
<
std
::
string
>
({
"in2_1"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
(),
"in2_2"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
(),
"in2_3"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
()}));
EXPECT_EQ
(
grad_test_op
->
Output
(
"In3"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
()),
"in3"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
());
}
TEST
(
GradOpBuilder
,
IOIgnoredInGradient
)
{
f
::
AttributeMap
attrs
{{
"input_format"
,
std
::
vector
<
int
>
{
0
,
1
,
3
,
5
}},
{
"output_format"
,
std
::
vector
<
int
>
{
0
,
2
,
3
}}};
std
::
shared_ptr
<
f
::
OperatorBase
>
test_op
(
f
::
OpRegistry
::
CreateOp
(
"io_ignored"
,
{
"in1"
,
"in2_1"
,
"in2_2"
,
"in3_1"
,
"in3_2"
},
{
"out1_1"
,
"out1_2"
,
"out2"
},
attrs
));
std
::
shared_ptr
<
f
::
OperatorBase
>
grad_test_op
=
f
::
OpRegistry
::
CreateGradOp
(
*
test_op
);
// 'In2' and 'Out2' are ignored in gradient calculating
ASSERT_EQ
(
grad_test_op
->
inputs_
.
size
(),
5UL
+
3UL
+
3UL
);
EXPECT_EQ
(
grad_test_op
->
Input
(
"In1"
),
"in1"
);
EXPECT_EQ
(
grad_test_op
->
Inputs
(
"In2_mult"
),
std
::
vector
<
std
::
string
>
({
f
::
OperatorBase
::
EMPTY_VAR_NAME
(),
f
::
OperatorBase
::
EMPTY_VAR_NAME
()}));
EXPECT_EQ
(
grad_test_op
->
Inputs
(
"In3_mult"
),
std
::
vector
<
std
::
string
>
({
"in3_1"
,
"in3_2"
}));
EXPECT_EQ
(
grad_test_op
->
Inputs
(
"Out1_mult"
),
std
::
vector
<
std
::
string
>
({
"out1_1"
,
"out1_2"
}));
EXPECT_EQ
(
grad_test_op
->
Input
(
"Out2"
),
f
::
OperatorBase
::
EMPTY_VAR_NAME
());
EXPECT_EQ
(
grad_test_op
->
Inputs
(
"Out1_mult"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
()),
std
::
vector
<
std
::
string
>
(
{
"out1_1"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
(),
"out1_2"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
()}));
EXPECT_EQ
(
grad_test_op
->
Input
(
"Out2"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
()),
"out2"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
());
ASSERT_EQ
(
grad_test_op
->
outputs_
.
size
(),
5UL
);
EXPECT_EQ
(
grad_test_op
->
Output
(
"In1"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
()),
"in1"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
());
EXPECT_EQ
(
grad_test_op
->
Outputs
(
"In2_mult"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
()),
std
::
vector
<
std
::
string
>
({
"in2_1"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
(),
"in2_2"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
()}));
EXPECT_EQ
(
grad_test_op
->
Outputs
(
"In3_mult"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
()),
std
::
vector
<
std
::
string
>
({
"in3_1"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
(),
"in3_2"
+
f
::
OperatorBase
::
GRAD_VAR_SUFFIX
()}));
}
paddle/framework/op_registry.h
浏览文件 @
927adb4a
...
@@ -306,8 +306,7 @@ class OpRegistry {
...
@@ -306,8 +306,7 @@ class OpRegistry {
static
std
::
shared_ptr
<
OperatorBase
>
CreateGradOp
(
const
OperatorBase
&
op
)
{
static
std
::
shared_ptr
<
OperatorBase
>
CreateGradOp
(
const
OperatorBase
&
op
)
{
PADDLE_ENFORCE
(
!
op
.
IsNetOp
(),
PADDLE_ENFORCE
(
!
op
.
IsNetOp
(),
"Use framework::Backward to get backward ops"
);
"Use framework::Backward to get backward ops"
);
GradOpBuilder
builder
(
op
);
std
::
shared_ptr
<
OperatorBase
>
grad_op
(
BuildGradOp
(
&
op
));
std
::
shared_ptr
<
OperatorBase
>
grad_op
(
builder
.
Build
());
grad_op
->
Init
();
grad_op
->
Init
();
return
grad_op
;
return
grad_op
;
}
}
...
@@ -315,7 +314,7 @@ class OpRegistry {
...
@@ -315,7 +314,7 @@ class OpRegistry {
static
std
::
unordered_map
<
std
::
string
,
OpProto
>&
protos
()
{
static
std
::
unordered_map
<
std
::
string
,
OpProto
>&
protos
()
{
static
std
::
unordered_map
<
std
::
string
,
OpProto
>
protos_
;
static
std
::
unordered_map
<
std
::
string
,
OpProto
>
protos_
;
return
protos_
;
return
protos_
;
}
;
}
static
std
::
unordered_map
<
std
::
string
,
std
::
string
>&
grad_ops
()
{
static
std
::
unordered_map
<
std
::
string
,
std
::
string
>&
grad_ops
()
{
static
std
::
unordered_map
<
std
::
string
,
std
::
string
>
grad_ops_
;
static
std
::
unordered_map
<
std
::
string
,
std
::
string
>
grad_ops_
;
...
@@ -337,7 +336,7 @@ class OpRegistry {
...
@@ -337,7 +336,7 @@ class OpRegistry {
static
std
::
unordered_map
<
std
::
string
,
OpAttrChecker
>&
op_checkers
()
{
static
std
::
unordered_map
<
std
::
string
,
OpAttrChecker
>&
op_checkers
()
{
static
std
::
unordered_map
<
std
::
string
,
OpAttrChecker
>
op_checkers_
;
static
std
::
unordered_map
<
std
::
string
,
OpAttrChecker
>
op_checkers_
;
return
op_checkers_
;
return
op_checkers_
;
}
;
}
static
void
GenerateTempVariableName
(
OperatorBase
*
op
)
{
static
void
GenerateTempVariableName
(
OperatorBase
*
op
)
{
static
std
::
atomic
<
size_t
>
gUniqId
(
0UL
);
static
std
::
atomic
<
size_t
>
gUniqId
(
0UL
);
...
@@ -354,7 +353,7 @@ class OpRegistry {
...
@@ -354,7 +353,7 @@ class OpRegistry {
template
<
typename
OpType
,
typename
ProtoMakerType
>
template
<
typename
OpType
,
typename
ProtoMakerType
>
class
OpRegisterHelper
{
class
OpRegisterHelper
{
public:
public:
OpRegisterHelper
(
const
char
*
op_type
)
{
explicit
OpRegisterHelper
(
const
char
*
op_type
)
{
OpRegistry
::
RegisterOp
<
OpType
,
ProtoMakerType
>
(
op_type
);
OpRegistry
::
RegisterOp
<
OpType
,
ProtoMakerType
>
(
op_type
);
}
}
};
};
...
@@ -400,6 +399,14 @@ class GradOpRegisterHelper {
...
@@ -400,6 +399,14 @@ class GradOpRegisterHelper {
return 0; \
return 0; \
}
}
/**
* Macro to Forbid user register Gradient Operator.
*/
#define NO_GRADIENT(__op_type) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \
__reg_gradient_op__##__op_type##__op_type##_grad, \
"NO_GRADIENT must be in global namespace")
/**
/**
* Macro to Register OperatorKernel.
* Macro to Register OperatorKernel.
*/
*/
...
...
paddle/framework/operator.h
浏览文件 @
927adb4a
...
@@ -55,6 +55,10 @@ class OperatorBase {
...
@@ -55,6 +55,10 @@ class OperatorBase {
/// e.g. Variable "x@GRAD" is the gradient of varibale "x".
/// e.g. Variable "x@GRAD" is the gradient of varibale "x".
static
std
::
string
GRAD_VAR_SUFFIX
()
{
return
"@GRAD"
;
}
static
std
::
string
GRAD_VAR_SUFFIX
()
{
return
"@GRAD"
;
}
static
std
::
string
GRAD_VAR_NAME
(
const
std
::
string
&
name
)
{
return
name
+
GRAD_VAR_SUFFIX
();
}
/// Variables with this suffix are supposed to be filled up with zeros.
/// Variables with this suffix are supposed to be filled up with zeros.
static
std
::
string
ZERO_VAR_SUFFIX
()
{
return
"@ZERO"
;
}
static
std
::
string
ZERO_VAR_SUFFIX
()
{
return
"@ZERO"
;
}
...
@@ -280,7 +284,7 @@ class OperatorWithKernel : public OperatorBase {
...
@@ -280,7 +284,7 @@ class OperatorWithKernel : public OperatorBase {
platform
::
Place
place_
;
platform
::
Place
place_
;
OpKernelKey
()
=
default
;
OpKernelKey
()
=
default
;
OpKernelKey
(
const
platform
::
DeviceContext
&
dev_ctx
)
{
explicit
OpKernelKey
(
const
platform
::
DeviceContext
&
dev_ctx
)
{
place_
=
dev_ctx
.
GetPlace
();
place_
=
dev_ctx
.
GetPlace
();
}
}
...
...
paddle/
pybind
/pybind.cc
→
paddle/
framework
/pybind.cc
浏览文件 @
927adb4a
...
@@ -4,7 +4,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
...
@@ -4,7 +4,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
distributed under the License is distributed on an "AS IS" BASIS,
...
@@ -17,19 +17,19 @@ limitations under the License. */
...
@@ -17,19 +17,19 @@ limitations under the License. */
#include <vector>
#include <vector>
#include "paddle/framework/backward.h"
#include "paddle/framework/backward.h"
#include "paddle/framework/net.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
#include "paddle/framework/operator.h"
#include "paddle/framework/scope.h"
#include "paddle/framework/scope.h"
#include "paddle/framework/tensor_py.h"
#include "paddle/operators/net_op.h"
#include "paddle/operators/type_alias.h"
#include "paddle/platform/enforce.h"
#include "paddle/platform/enforce.h"
#include "paddle/platform/place.h"
#include "paddle/platform/place.h"
#include "paddle/pybind/tensor_bind.h"
#include "pybind11/numpy.h"
#include "pybind11/numpy.h"
#include "pybind11/pybind11.h"
#include "pybind11/pybind11.h"
#include "pybind11/stl.h"
#include "pybind11/stl.h"
namespace
py
=
pybind11
;
namespace
py
=
pybind11
;
namespace
pd
=
paddle
::
framework
;
USE_OP
(
add_two
);
USE_OP
(
add_two
);
USE_OP
(
onehot_cross_entropy
);
USE_OP
(
onehot_cross_entropy
);
...
@@ -41,17 +41,18 @@ USE_OP(sigmoid);
...
@@ -41,17 +41,18 @@ USE_OP(sigmoid);
USE_OP
(
softmax
);
USE_OP
(
softmax
);
USE_OP
(
rowwise_add
);
USE_OP
(
rowwise_add
);
USE_OP_WITHOUT_KERNEL
(
recurrent_op
);
USE_OP_WITHOUT_KERNEL
(
recurrent_op
);
namespace
paddle
{
namespace
framework
{
template
<
typename
ClassType
>
template
<
typename
ClassType
>
void
ExposeOperator
(
ClassType
&
m
)
{
void
ExposeOperator
(
ClassType
&
m
)
{
m
.
def
(
"infer_shape"
,
&
ClassType
::
type
::
InferShape
)
m
.
def
(
"infer_shape"
,
&
ClassType
::
type
::
InferShape
)
.
def
(
"run"
,
&
ClassType
::
type
::
Run
)
.
def
(
"run"
,
&
ClassType
::
type
::
Run
)
.
def
(
"type"
,
.
def
(
"type"
,
[](
const
typename
ClassType
::
type
&
op
)
->
std
::
string
{
[](
const
typename
ClassType
::
type
&
op
)
->
std
::
string
{
return
op
.
type_
;
return
op
.
type_
;
})
})
.
def
(
"outputs"
,
.
def
(
"outputs"
,
[](
const
typename
ClassType
::
type
&
op
)
->
std
::
vector
<
std
::
string
>
{
[](
const
typename
ClassType
::
type
&
op
)
->
std
::
vector
<
std
::
string
>
{
return
op
.
outputs_
;
return
op
.
outputs_
;
})
})
.
def
(
"__str__"
,
&
ClassType
::
type
::
DebugString
);
.
def
(
"__str__"
,
&
ClassType
::
type
::
DebugString
);
...
@@ -73,80 +74,81 @@ bool IsCompileGPU() {
...
@@ -73,80 +74,81 @@ bool IsCompileGPU() {
PYBIND11_PLUGIN
(
core
)
{
PYBIND11_PLUGIN
(
core
)
{
py
::
module
m
(
"core"
,
"C++ core of PaddlePaddle"
);
py
::
module
m
(
"core"
,
"C++ core of PaddlePaddle"
);
py
::
class_
<
pd
::
Tensor
>
(
m
,
"Tensor"
,
py
::
buffer_protocol
())
py
::
class_
<
Tensor
>
(
m
,
"Tensor"
,
py
::
buffer_protocol
())
.
def_buffer
([](
pd
::
Tensor
&
self
)
->
py
::
buffer_info
{
.
def_buffer
(
return
paddle
::
pybind
::
CastToPyBuffer
(
self
);
[](
Tensor
&
self
)
->
py
::
buffer_info
{
return
CastToPyBuffer
(
self
);
})
})
.
def
(
"get_dims"
,
.
def
(
"get_dims"
,
[](
const
pd
::
Tensor
&
self
)
{
return
pd
::
vectorize
(
self
.
dims
());
})
[](
const
Tensor
&
self
)
{
return
vectorize
(
self
.
dims
());
})
.
def
(
"set_dims"
,
.
def
(
"set_dims"
,
[](
pd
::
Tensor
&
self
,
const
std
::
vector
<
int
>&
dim
)
{
[](
Tensor
&
self
,
const
std
::
vector
<
int
>
&
dim
)
{
self
.
Resize
(
pd
::
make_ddim
(
dim
));
self
.
Resize
(
make_ddim
(
dim
));
})
})
.
def
(
"alloc_float"
,
.
def
(
"alloc_float"
,
[](
pd
::
Tensor
&
self
,
paddle
::
platform
::
GPUPlace
&
place
)
{
[](
Tensor
&
self
,
paddle
::
platform
::
GPUPlace
&
place
)
{
self
.
mutable_data
<
float
>
(
place
);
self
.
mutable_data
<
float
>
(
place
);
})
})
.
def
(
"alloc_float"
,
.
def
(
"alloc_float"
,
[](
pd
::
Tensor
&
self
,
paddle
::
platform
::
CPUPlace
&
place
)
{
[](
Tensor
&
self
,
paddle
::
platform
::
CPUPlace
&
place
)
{
self
.
mutable_data
<
float
>
(
place
);
self
.
mutable_data
<
float
>
(
place
);
})
})
.
def
(
"alloc_int"
,
.
def
(
"alloc_int"
,
[](
pd
::
Tensor
&
self
,
paddle
::
platform
::
CPUPlace
&
place
)
{
[](
Tensor
&
self
,
paddle
::
platform
::
CPUPlace
&
place
)
{
self
.
mutable_data
<
int
>
(
place
);
self
.
mutable_data
<
int
>
(
place
);
})
})
.
def
(
"alloc_int"
,
.
def
(
"alloc_int"
,
[](
pd
::
Tensor
&
self
,
paddle
::
platform
::
GPUPlace
&
place
)
{
[](
Tensor
&
self
,
paddle
::
platform
::
GPUPlace
&
place
)
{
self
.
mutable_data
<
int
>
(
place
);
self
.
mutable_data
<
int
>
(
place
);
})
})
.
def
(
"set"
,
paddle
::
pybind
::
PyCPUTensorSetFromArray
<
float
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
float
>
)
.
def
(
"set"
,
paddle
::
pybind
::
PyCPUTensorSetFromArray
<
int
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
int
>
)
#ifndef PADDLE_ONLY_CPU
#ifndef PADDLE_ONLY_CPU
.
def
(
"set"
,
paddle
::
pybind
::
PyCUDATensorSetFromArray
<
float
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
float
>
)
.
def
(
"set"
,
paddle
::
pybind
::
PyCUDATensorSetFromArray
<
int
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
int
>
)
#endif
#endif
.
def
(
"shape"
,
.
def
(
"shape"
,
[](
Tensor
&
self
)
{
return
vectorize
(
self
.
dims
());
})
[](
pd
::
Tensor
&
self
)
{
return
pd
::
vectorize
(
self
.
dims
());
});
.
def
(
"set_float_element"
,
[](
Tensor
&
self
,
size_t
offset
,
float
f
)
{
// TODO(yuyang18): Only support GPU now.
self
.
data
<
float
>
()[
offset
]
=
f
;
})
.
def
(
"get_float_element"
,
[](
Tensor
&
self
,
size_t
offset
)
->
float
{
// TODO(yuyang18): Only support GPU now.
return
self
.
data
<
float
>
()[
offset
];
});
py
::
class_
<
pd
::
Variable
>
(
m
,
"Variable"
,
R"DOC(Variable Class.
py
::
class_
<
Variable
>
(
m
,
"Variable"
,
R"DOC(Variable Class.
All parameter, weight, gradient are variables in Paddle.
All parameter, weight, gradient are variables in Paddle.
)DOC"
)
)DOC"
)
.
def
(
"is_int"
,
[](
const
pd
::
Variable
&
var
)
{
return
var
.
IsType
<
int
>
();
})
.
def
(
"is_int"
,
[](
const
Variable
&
var
)
{
return
var
.
IsType
<
int
>
();
})
.
def
(
"set_int"
,
.
def
(
"set_int"
,
[](
pd
::
Variable
&
var
,
int
val
)
->
void
{
[](
Variable
&
var
,
int
val
)
->
void
{
*
var
.
GetMutable
<
int
>
()
=
val
;
})
*
var
.
GetMutable
<
int
>
()
=
val
;
.
def
(
"get_int"
,
[](
const
Variable
&
var
)
->
int
{
return
var
.
Get
<
int
>
();
})
})
.
def
(
"get_int"
,
[](
const
pd
::
Variable
&
var
)
->
int
{
return
var
.
Get
<
int
>
();
})
.
def
(
"get_tensor"
,
.
def
(
"get_tensor"
,
[](
pd
::
Variable
&
self
)
->
pd
::
Tensor
*
{
[](
Variable
&
self
)
->
Tensor
*
{
return
self
.
GetMutable
<
Tensor
>
();
},
return
self
.
GetMutable
<
pd
::
Tensor
>
();
},
py
::
return_value_policy
::
reference
)
py
::
return_value_policy
::
reference
)
.
def
(
"get_net"
,
.
def
(
"get_net"
,
[](
pd
::
Variable
&
self
)
->
pd
::
NetOp
*
{
[](
Variable
&
self
)
->
ops
::
NetOp
*
{
return
self
.
GetMutable
<
pd
::
NetOp
>
();
return
self
.
GetMutable
<
ops
::
NetOp
>
();
},
},
py
::
return_value_policy
::
reference
);
py
::
return_value_policy
::
reference
);
py
::
class_
<
pd
::
Scope
>
(
m
,
"Scope"
,
""
)
py
::
class_
<
Scope
>
(
m
,
"Scope"
,
""
)
.
def
(
"new_var"
,
.
def
(
"new_var"
,
[](
pd
::
Scope
&
self
,
const
std
::
string
&
name
)
->
pd
::
Variable
*
{
[](
Scope
&
self
,
const
std
::
string
&
name
)
->
Variable
*
{
return
self
.
NewVar
(
name
);
return
self
.
NewVar
(
name
);
},
},
py
::
return_value_policy
::
reference
)
py
::
return_value_policy
::
reference
)
.
def
(
"find_var"
,
&
pd
::
Scope
::
FindVar
,
py
::
return_value_policy
::
reference
)
.
def
(
"find_var"
,
&
Scope
::
FindVar
,
py
::
return_value_policy
::
reference
)
.
def
(
py
::
init
<>
())
.
def
(
py
::
init
<>
())
.
def
(
"new_scope"
,
.
def
(
"new_scope"
,
[](
Scope
&
self
)
->
Scope
*
{
return
&
self
.
NewScope
();
},
[](
pd
::
Scope
&
self
)
->
pd
::
Scope
*
{
return
&
self
.
NewScope
();
},
py
::
return_value_policy
::
reference
)
py
::
return_value_policy
::
reference
)
.
def
(
"drop_kids"
,
&
pd
::
Scope
::
DropKids
);
.
def
(
"drop_kids"
,
&
Scope
::
DropKids
);
//! @note: Be careful! PyBind will return std::string as an unicode, not
//! @note: Be careful! PyBind will return std::string as an unicode, not
//! Python str. If you want a str object, you should cast them in Python.
//! Python str. If you want a str object, you should cast them in Python.
m
.
def
(
"get_all_op_protos"
,
[]()
->
std
::
vector
<
py
::
bytes
>
{
m
.
def
(
"get_all_op_protos"
,
[]()
->
std
::
vector
<
py
::
bytes
>
{
auto
&
protos
=
pd
::
OpRegistry
::
protos
();
auto
&
protos
=
OpRegistry
::
protos
();
std
::
vector
<
py
::
bytes
>
ret_values
;
std
::
vector
<
py
::
bytes
>
ret_values
;
for
(
auto
it
=
protos
.
begin
();
it
!=
protos
.
end
();
++
it
)
{
for
(
auto
it
=
protos
.
begin
();
it
!=
protos
.
end
();
++
it
)
{
PADDLE_ENFORCE
(
it
->
second
.
IsInitialized
(),
PADDLE_ENFORCE
(
it
->
second
.
IsInitialized
(),
...
@@ -161,8 +163,8 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -161,8 +163,8 @@ All parameter, weight, gradient are variables in Paddle.
m
.
def_submodule
(
m
.
def_submodule
(
"var_names"
,
"var_names"
,
"The module will return special predefined variable name in Paddle"
)
"The module will return special predefined variable name in Paddle"
)
.
def
(
"empty"
,
pd
::
OperatorBase
::
EMPTY_VAR_NAME
)
.
def
(
"empty"
,
OperatorBase
::
EMPTY_VAR_NAME
)
.
def
(
"temp"
,
pd
::
OperatorBase
::
TMP_VAR_NAME
);
.
def
(
"temp"
,
OperatorBase
::
TMP_VAR_NAME
);
// clang-format off
// clang-format off
py
::
class_
<
paddle
::
platform
::
DeviceContext
>
(
m
,
"DeviceContext"
)
py
::
class_
<
paddle
::
platform
::
DeviceContext
>
(
m
,
"DeviceContext"
)
.
def_static
(
"create"
,
.
def_static
(
"create"
,
...
@@ -185,43 +187,45 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -185,43 +187,45 @@ All parameter, weight, gradient are variables in Paddle.
py
::
class_
<
paddle
::
platform
::
CPUPlace
>
(
m
,
"CPUPlace"
).
def
(
py
::
init
<>
());
py
::
class_
<
paddle
::
platform
::
CPUPlace
>
(
m
,
"CPUPlace"
).
def
(
py
::
init
<>
());
py
::
class_
<
pd
::
OperatorBase
,
std
::
shared_ptr
<
pd
::
OperatorBase
>>
operator_base
(
py
::
class_
<
OperatorBase
,
std
::
shared_ptr
<
OperatorBase
>>
operator_base
(
m
,
"Operator"
);
m
,
"Operator"
);
operator_base
.
def_static
(
"create"
,
[](
py
::
bytes
protobin
)
{
operator_base
.
def_static
(
"create"
,
[](
py
::
bytes
protobin
)
{
pd
::
OpDesc
desc
;
OpDesc
desc
;
PADDLE_ENFORCE
(
desc
.
ParsePartialFromString
(
protobin
),
PADDLE_ENFORCE
(
desc
.
ParsePartialFromString
(
protobin
),
"Cannot parse user input to OpDesc"
);
"Cannot parse user input to OpDesc"
);
PADDLE_ENFORCE
(
desc
.
IsInitialized
(),
PADDLE_ENFORCE
(
desc
.
IsInitialized
(),
"User OpDesc is not initialized, reason %s"
,
"User OpDesc is not initialized, reason %s"
,
desc
.
InitializationErrorString
());
desc
.
InitializationErrorString
());
return
pd
::
OpRegistry
::
CreateOp
(
desc
);
return
OpRegistry
::
CreateOp
(
desc
);
});
});
operator_base
.
def
(
"backward"
,
operator_base
.
def
(
"backward"
,
[](
const
pd
::
OperatorBase
&
forwardOp
,
[](
const
OperatorBase
&
forwardOp
,
const
std
::
unordered_set
<
std
::
string
>
&
no_grad_vars
)
{
const
std
::
unordered_set
<
std
::
string
>
&
no_grad_vars
)
{
return
pd
::
Backward
(
forwardOp
,
no_grad_vars
);
return
Backward
(
forwardOp
,
no_grad_vars
);
});
});
ExposeOperator
(
operator_base
);
ExposeOperator
(
operator_base
);
py
::
class_
<
pd
::
NetOp
,
std
::
shared_ptr
<
pd
::
NetOp
>>
net
(
m
,
"Net"
);
py
::
class_
<
ops
::
NetOp
,
std
::
shared_ptr
<
ops
::
NetOp
>>
net
(
m
,
"Net"
);
net
.
def_static
(
"create"
,
net
.
def_static
(
"create"
,
[]()
->
std
::
shared_ptr
<
pd
::
NetOp
>
{
[]()
->
std
::
shared_ptr
<
ops
::
NetOp
>
{
auto
retv
=
std
::
make_shared
<
pd
::
NetOp
>
();
auto
retv
=
std
::
make_shared
<
ops
::
NetOp
>
();
retv
->
type_
=
"plain_net"
;
retv
->
type_
=
"plain_net"
;
return
retv
;
return
retv
;
})
})
.
def
(
"add_op"
,
&
pd
::
NetOp
::
AddOp
)
.
def
(
"add_op"
,
&
ops
::
NetOp
::
AddOp
)
.
def
(
"add_op"
,
.
def
(
[](
pd
::
NetOp
&
self
,
const
std
::
shared_ptr
<
pd
::
NetOp
>&
net
)
->
void
{
"add_op"
,
self
.
AddOp
(
std
::
static_pointer_cast
<
pd
::
OperatorBase
>
(
net
));
[](
ops
::
NetOp
&
self
,
const
std
::
shared_ptr
<
ops
::
NetOp
>
&
net
)
->
void
{
})
self
.
AddOp
(
std
::
static_pointer_cast
<
OperatorBase
>
(
net
));
.
def
(
"complete_add_op"
,
&
pd
::
NetOp
::
CompleteAddOp
)
})
.
def
(
"complete_add_op"
,
&
ops
::
NetOp
::
CompleteAddOp
)
.
def
(
"complete_add_op"
,
.
def
(
"complete_add_op"
,
[](
std
::
shared_ptr
<
pd
::
NetOp
>&
self
)
{
self
->
CompleteAddOp
();
});
[](
std
::
shared_ptr
<
ops
::
NetOp
>
&
self
)
{
self
->
CompleteAddOp
();
});
ExposeOperator
(
net
);
ExposeOperator
(
net
);
m
.
def
(
"unique_integer"
,
UniqueIntegerGenerator
);
m
.
def
(
"unique_integer"
,
UniqueIntegerGenerator
);
...
@@ -230,3 +234,5 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -230,3 +234,5 @@ All parameter, weight, gradient are variables in Paddle.
return
m
.
ptr
();
return
m
.
ptr
();
}
}
}
// namespace framework
}
// namespace paddle
paddle/framework/tensor.h
浏览文件 @
927adb4a
...
@@ -26,19 +26,17 @@ limitations under the License. */
...
@@ -26,19 +26,17 @@ limitations under the License. */
#include "unsupported/Eigen/CXX11/Tensor"
#include "unsupported/Eigen/CXX11/Tensor"
namespace
paddle
{
namespace
paddle
{
namespace
pybind
{
namespace
details
{
// forward declare
template
<
bool
less
,
size_t
i
,
typename
...
args
>
struct
CastToPyBufferImpl
;
}
// namespace details
}
// namespace pybind
namespace
framework
{
namespace
framework
{
namespace
details
{
template
<
bool
less
,
size_t
i
,
typename
...
args
>
struct
CastToPyBufferImpl
;
}
class
Tensor
{
class
Tensor
{
public:
public:
template
<
bool
less
,
size_t
i
,
typename
...
args
>
template
<
bool
less
,
size_t
i
,
typename
...
args
>
friend
struct
paddle
::
pybind
::
details
::
CastToPyBufferImpl
;
friend
struct
details
::
CastToPyBufferImpl
;
template
<
typename
T
,
size_t
D
,
int
MajorType
,
typename
IndexType
>
template
<
typename
T
,
size_t
D
,
int
MajorType
,
typename
IndexType
>
friend
struct
EigenTensor
;
friend
struct
EigenTensor
;
...
@@ -167,4 +165,4 @@ class Tensor {
...
@@ -167,4 +165,4 @@ class Tensor {
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
#include "paddle/framework/
detail/tensor-in
l.h"
#include "paddle/framework/
tensor_imp
l.h"
paddle/framework/
detail/tensor-in
l.h
→
paddle/framework/
tensor_imp
l.h
浏览文件 @
927adb4a
文件已移动
paddle/
pybind/tensor_bind
.h
→
paddle/
framework/tensor_py
.h
浏览文件 @
927adb4a
...
@@ -23,7 +23,7 @@ namespace py = pybind11;
...
@@ -23,7 +23,7 @@ namespace py = pybind11;
namespace
paddle
{
namespace
paddle
{
namespace
pybind
{
namespace
framework
{
namespace
details
{
namespace
details
{
...
@@ -63,11 +63,8 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
...
@@ -63,11 +63,8 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
}
}
return
py
::
buffer_info
(
return
py
::
buffer_info
(
dst_tensor
.
mutable_data
<
CUR_TYPE
>
(
dst_tensor
.
holder_
->
place
()),
dst_tensor
.
mutable_data
<
CUR_TYPE
>
(
dst_tensor
.
holder_
->
place
()),
sizeof
(
CUR_TYPE
),
sizeof
(
CUR_TYPE
),
py
::
format_descriptor
<
CUR_TYPE
>::
format
(),
py
::
format_descriptor
<
CUR_TYPE
>::
format
(),
(
size_t
)
framework
::
arity
(
dst_tensor
.
dims
()),
dims_outside
,
strides
);
(
size_t
)
framework
::
arity
(
dst_tensor
.
dims
()),
dims_outside
,
strides
);
}
else
{
}
else
{
constexpr
bool
less
=
I
+
1
<
std
::
tuple_size
<
std
::
tuple
<
ARGS
...
>>::
value
;
constexpr
bool
less
=
I
+
1
<
std
::
tuple_size
<
std
::
tuple
<
ARGS
...
>>::
value
;
return
CastToPyBufferImpl
<
less
,
I
+
1
,
ARGS
...
>
()(
tensor
);
return
CastToPyBufferImpl
<
less
,
I
+
1
,
ARGS
...
>
()(
tensor
);
...
@@ -110,8 +107,8 @@ void PyCUDATensorSetFromArray(
...
@@ -110,8 +107,8 @@ void PyCUDATensorSetFromArray(
self
.
Resize
(
framework
::
make_ddim
(
dims
));
self
.
Resize
(
framework
::
make_ddim
(
dims
));
auto
*
dst
=
self
.
mutable_data
<
T
>
(
place
);
auto
*
dst
=
self
.
mutable_data
<
T
>
(
place
);
paddle
::
platform
::
GpuMemcpySync
(
paddle
::
platform
::
GpuMemcpySync
(
dst
,
array
.
data
(),
sizeof
(
T
)
*
array
.
size
(),
dst
,
array
.
data
(),
sizeof
(
T
)
*
array
.
size
(),
cudaMemcpyHostToDevice
);
cudaMemcpyHostToDevice
);
}
}
#endif
#endif
...
...
paddle/function/ConvOp.h
浏览文件 @
927adb4a
...
@@ -109,6 +109,13 @@ protected:
...
@@ -109,6 +109,13 @@ protected:
return
filter
[
filter
.
ndims
()
-
1
];
return
filter
[
filter
.
ndims
()
-
1
];
}
}
// determine whether im2col needs to be performed
inline
bool
isNeedIm2col
(
const
TensorShape
&
filter
)
const
{
return
!
(
getFilterHeight
(
filter
)
==
1
&&
getFilterWidth
(
filter
)
==
1
&&
strideH
()
==
1
&&
strideW
()
==
1
&&
paddingH
()
==
0
&&
paddingW
()
==
0
);
}
std
::
vector
<
size_t
>
strides_
;
std
::
vector
<
size_t
>
strides_
;
std
::
vector
<
size_t
>
paddings_
;
std
::
vector
<
size_t
>
paddings_
;
...
...
paddle/function/GemmConvOp.cpp
浏览文件 @
927adb4a
...
@@ -66,16 +66,23 @@ public:
...
@@ -66,16 +66,23 @@ public:
real
*
inputData
=
inputs
[
0
].
data
<
real
>
();
real
*
inputData
=
inputs
[
0
].
data
<
real
>
();
real
*
filterData
=
inputs
[
1
].
data
<
real
>
();
real
*
filterData
=
inputs
[
1
].
data
<
real
>
();
real
*
outputData
=
outputs
[
0
].
data
<
real
>
();
real
*
outputData
=
outputs
[
0
].
data
<
real
>
();
bool
needIm2col
=
isNeedIm2col
(
filter
);
TensorShape
imShape
=
TensorShape
imShape
=
TensorShape
({
inputChannels
/
groups_
,
inputHeight
,
inputWidth
});
TensorShape
({
inputChannels
/
groups_
,
inputHeight
,
inputWidth
});
TensorShape
colShape
=
TensorShape
({
inputChannels
/
groups_
,
filterHeight
,
filterWidth
,
outputHeight
,
outputWidth
});
resizeBuffer
<
Device
>
(
colShape
.
getElements
());
TensorShape
colShape
;
real
*
colData
=
reinterpret_cast
<
real
*>
(
memory_
->
getBuf
());
real
*
colData
=
NULL
;
if
(
needIm2col
)
{
colShape
=
TensorShape
({
inputChannels
/
groups_
,
filterHeight
,
filterWidth
,
outputHeight
,
outputWidth
});
resizeBuffer
<
Device
>
(
colShape
.
getElements
());
colData
=
reinterpret_cast
<
real
*>
(
memory_
->
getBuf
());
}
Im2ColFunctor
<
kCFO
,
Device
,
real
>
im2col
;
Im2ColFunctor
<
kCFO
,
Device
,
real
>
im2col
;
GemmFunctor
<
Device
,
real
>
gemm
;
GemmFunctor
<
Device
,
real
>
gemm
;
...
@@ -86,15 +93,18 @@ public:
...
@@ -86,15 +93,18 @@ public:
for
(
size_t
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
size_t
g
=
0
;
g
<
groups_
;
g
++
)
{
for
(
size_t
g
=
0
;
g
<
groups_
;
g
++
)
{
im2col
(
inputData
+
g
*
inputOffset
,
if
(
needIm2col
)
{
imShape
,
im2col
(
inputData
+
g
*
inputOffset
,
colData
,
imShape
,
colShape
,
colData
,
strideH
(),
colShape
,
strideW
(),
strideH
(),
paddingH
(),
strideW
(),
paddingW
());
paddingH
(),
paddingW
());
}
else
{
colData
=
inputData
+
g
*
inputOffset
;
}
int
M
=
outputChannels
/
groups_
;
int
M
=
outputChannels
/
groups_
;
int
N
=
outputHeight
*
outputWidth
;
int
N
=
outputHeight
*
outputWidth
;
int
K
=
inputChannels
/
groups_
*
filterHeight
*
filterWidth
;
int
K
=
inputChannels
/
groups_
*
filterHeight
*
filterWidth
;
...
@@ -159,19 +169,27 @@ public:
...
@@ -159,19 +169,27 @@ public:
real
*
outputGrad
=
inputs
[
0
].
data
<
real
>
();
real
*
outputGrad
=
inputs
[
0
].
data
<
real
>
();
real
*
filterData
=
inputs
[
1
].
data
<
real
>
();
real
*
filterData
=
inputs
[
1
].
data
<
real
>
();
real
*
inputGrad
=
outputs
[
0
].
data
<
real
>
();
real
*
inputGrad
=
outputs
[
0
].
data
<
real
>
();
bool
needIm2col
=
isNeedIm2col
(
filter
);
TensorShape
imShape
=
TensorShape
imShape
=
TensorShape
({
inputChannels
/
groups_
,
inputHeight
,
inputWidth
});
TensorShape
({
inputChannels
/
groups_
,
inputHeight
,
inputWidth
});
TensorShape
colShape
=
TensorShape
({
inputChannels
/
groups_
,
filterHeight
,
filterWidth
,
outputHeight
,
outputWidth
});
resizeBuffer
<
Device
>
(
colShape
.
getElements
());
TensorShape
colShape
;
real
*
colData
=
reinterpret_cast
<
real
*>
(
memory_
->
getBuf
());
real
*
colData
=
NULL
;
if
(
needIm2col
)
{
colShape
=
TensorShape
({
inputChannels
/
groups_
,
filterHeight
,
filterWidth
,
outputHeight
,
outputWidth
});
resizeBuffer
<
Device
>
(
colShape
.
getElements
());
colData
=
reinterpret_cast
<
real
*>
(
memory_
->
getBuf
());
}
Col2ImFunctor
<
kCFO
,
Device
,
real
>
col2im
;
Col2ImFunctor
<
kCFO
,
Device
,
real
>
col2im
;
GemmFunctor
<
Device
,
real
>
gemm
;
GemmFunctor
<
Device
,
real
>
gemm
;
size_t
inputOffset
=
imShape
.
getElements
();
size_t
inputOffset
=
imShape
.
getElements
();
size_t
outputOffset
=
size_t
outputOffset
=
(
outputChannels
/
groups_
)
*
outputHeight
*
outputWidth
;
(
outputChannels
/
groups_
)
*
outputHeight
*
outputWidth
;
...
@@ -182,6 +200,11 @@ public:
...
@@ -182,6 +200,11 @@ public:
int
K
=
outputChannels
/
groups_
;
int
K
=
outputChannels
/
groups_
;
int
N
=
outputHeight
*
outputWidth
;
int
N
=
outputHeight
*
outputWidth
;
int
M
=
inputChannels
/
groups_
*
filterHeight
*
filterWidth
;
int
M
=
inputChannels
/
groups_
*
filterHeight
*
filterWidth
;
real
scale
=
0.0
f
;
if
(
!
needIm2col
)
{
colData
=
inputGrad
+
g
*
inputOffset
;
scale
=
1.0
f
;
}
gemm
(
CblasTrans
,
gemm
(
CblasTrans
,
CblasNoTrans
,
CblasNoTrans
,
M
,
M
,
...
@@ -192,17 +215,19 @@ public:
...
@@ -192,17 +215,19 @@ public:
M
,
M
,
outputGrad
+
g
*
outputOffset
,
outputGrad
+
g
*
outputOffset
,
N
,
N
,
0.0
f
,
scale
,
colData
,
colData
,
N
);
N
);
col2im
(
inputGrad
+
g
*
inputOffset
,
if
(
needIm2col
)
{
imShape
,
col2im
(
inputGrad
+
g
*
inputOffset
,
colData
,
imShape
,
colShape
,
colData
,
strideH
(),
colShape
,
strideW
(),
strideH
(),
paddingH
(),
strideW
(),
paddingW
());
paddingH
(),
paddingW
());
}
}
}
inputGrad
+=
inputChannels
*
inputHeight
*
inputWidth
;
inputGrad
+=
inputChannels
*
inputHeight
*
inputWidth
;
outputGrad
+=
outputChannels
*
outputHeight
*
outputWidth
;
outputGrad
+=
outputChannels
*
outputHeight
*
outputWidth
;
...
@@ -255,16 +280,23 @@ public:
...
@@ -255,16 +280,23 @@ public:
real
*
outputGrad
=
inputs
[
0
].
data
<
real
>
();
real
*
outputGrad
=
inputs
[
0
].
data
<
real
>
();
real
*
inputData
=
inputs
[
1
].
data
<
real
>
();
real
*
inputData
=
inputs
[
1
].
data
<
real
>
();
real
*
filterGrad
=
outputs
[
0
].
data
<
real
>
();
real
*
filterGrad
=
outputs
[
0
].
data
<
real
>
();
bool
needIm2col
=
isNeedIm2col
(
filter
);
TensorShape
imShape
=
TensorShape
imShape
=
TensorShape
({
inputChannels
/
groups_
,
inputHeight
,
inputWidth
});
TensorShape
({
inputChannels
/
groups_
,
inputHeight
,
inputWidth
});
TensorShape
colShape
=
TensorShape
({
inputChannels
/
groups_
,
filterHeight
,
filterWidth
,
outputHeight
,
outputWidth
});
resizeBuffer
<
Device
>
(
colShape
.
getElements
());
TensorShape
colShape
;
real
*
colData
=
reinterpret_cast
<
real
*>
(
memory_
->
getBuf
());
real
*
colData
=
NULL
;
if
(
needIm2col
)
{
colShape
=
TensorShape
({
inputChannels
/
groups_
,
filterHeight
,
filterWidth
,
outputHeight
,
outputWidth
});
resizeBuffer
<
Device
>
(
colShape
.
getElements
());
colData
=
reinterpret_cast
<
real
*>
(
memory_
->
getBuf
());
}
Im2ColFunctor
<
kCFO
,
Device
,
real
>
im2col
;
Im2ColFunctor
<
kCFO
,
Device
,
real
>
im2col
;
GemmFunctor
<
Device
,
real
>
gemm
;
GemmFunctor
<
Device
,
real
>
gemm
;
...
@@ -274,15 +306,18 @@ public:
...
@@ -274,15 +306,18 @@ public:
size_t
filterOffset
=
filter
.
getElements
()
/
groups_
;
size_t
filterOffset
=
filter
.
getElements
()
/
groups_
;
for
(
size_t
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
size_t
g
=
0
;
g
<
groups_
;
g
++
)
{
for
(
size_t
g
=
0
;
g
<
groups_
;
g
++
)
{
im2col
(
inputData
+
g
*
inputOffset
,
if
(
needIm2col
)
{
imShape
,
im2col
(
inputData
+
g
*
inputOffset
,
colData
,
imShape
,
colShape
,
colData
,
strideH
(),
colShape
,
strideW
(),
strideH
(),
paddingH
(),
strideW
(),
paddingW
());
paddingH
(),
paddingW
());
}
else
{
colData
=
inputData
+
g
*
inputOffset
;
}
int
M
=
outputChannels
/
groups_
;
int
M
=
outputChannels
/
groups_
;
int
K
=
outputHeight
*
outputWidth
;
int
K
=
outputHeight
*
outputWidth
;
int
N
=
inputChannels
/
groups_
*
filterHeight
*
filterWidth
;
int
N
=
inputChannels
/
groups_
*
filterHeight
*
filterWidth
;
...
...
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
浏览文件 @
927adb4a
...
@@ -967,8 +967,9 @@ void RecurrentGradientMachine::generateSequence() {
...
@@ -967,8 +967,9 @@ void RecurrentGradientMachine::generateSequence() {
size_t
numSequences
=
getGenBatchSize
();
size_t
numSequences
=
getGenBatchSize
();
resizeBootFrame
(
numSequences
);
resizeBootFrame
(
numSequences
);
// We create only two sub-network in generation for alternate use.
// We create only two sub-network in generation, one stores states of all
// Thus, we can reduce total memory of output_ in layer forward.
// layers in previous time step and the other storing the states at current
// time step.
resizeOrCreateFrames
(
2
);
resizeOrCreateFrames
(
2
);
// outFrameLines_.size() > 1UL
// outFrameLines_.size() > 1UL
...
@@ -1001,10 +1002,9 @@ void RecurrentGradientMachine::generateSequence() {
...
@@ -1001,10 +1002,9 @@ void RecurrentGradientMachine::generateSequence() {
// init outArg
// init outArg
size_t
resultNum
=
generator_
.
config
.
num_results_per_sample
();
size_t
resultNum
=
generator_
.
config
.
num_results_per_sample
();
IVector
::
resizeOrCreate
(
size_t
maxGenWordCount
=
generator_
.
outArg
.
ids
,
generator_
.
config
.
max_num_frames
()
*
numSequences
*
resultNum
;
generator_
.
config
.
max_num_frames
()
*
numSequences
*
resultNum
,
IVector
::
resizeOrCreate
(
generator_
.
outArg
.
ids
,
maxGenWordCount
,
false
);
false
);
if
(
resultNum
>
1
)
{
if
(
resultNum
>
1
)
{
CHECK_LE
(
resultNum
,
static_cast
<
size_t
>
(
generator_
.
config
.
beam_size
()));
CHECK_LE
(
resultNum
,
static_cast
<
size_t
>
(
generator_
.
config
.
beam_size
()));
Matrix
::
resizeOrCreate
(
generator_
.
outArg
.
in
,
Matrix
::
resizeOrCreate
(
generator_
.
outArg
.
in
,
...
@@ -1012,6 +1012,11 @@ void RecurrentGradientMachine::generateSequence() {
...
@@ -1012,6 +1012,11 @@ void RecurrentGradientMachine::generateSequence() {
/* width */
resultNum
,
/* width */
resultNum
,
false
,
false
,
/* useGpu */
false
);
/* useGpu */
false
);
Matrix
::
resizeOrCreate
(
generator_
.
outArg
.
value
,
/* height */
maxGenWordCount
,
/* width */
1
,
false
,
/* useGpu */
false
);
}
}
ICpuGpuVector
::
resizeOrCreate
(
generator_
.
outArg
.
sequenceStartPositions
,
ICpuGpuVector
::
resizeOrCreate
(
generator_
.
outArg
.
sequenceStartPositions
,
numSequences
+
1
,
numSequences
+
1
,
...
@@ -1313,13 +1318,20 @@ void RecurrentGradientMachine::fillGenOutputs() {
...
@@ -1313,13 +1318,20 @@ void RecurrentGradientMachine::fillGenOutputs() {
starts
[
0
]
=
0
;
starts
[
0
]
=
0
;
if
(
numResults
>
1
)
{
if
(
numResults
>
1
)
{
real
*
probs
=
generator_
.
outArg
.
in
->
getData
();
real
*
probs
=
generator_
.
outArg
.
in
->
getData
();
real
*
idsProb
=
generator_
.
outArg
.
value
->
getData
();
size_t
curPos
=
0
;
for
(
size_t
i
=
0
;
i
<
finalPaths_
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
finalPaths_
.
size
();
++
i
)
{
for
(
size_t
j
=
0
;
j
<
finalPaths_
[
i
].
size
();
++
j
)
{
for
(
size_t
j
=
0
;
j
<
finalPaths_
[
i
].
size
();
++
j
)
{
Path
&
path
=
finalPaths_
[
i
][
j
];
Path
&
path
=
finalPaths_
[
i
][
j
];
generator_
.
ids
.
push_back
(
path
.
ids
.
size
());
// sequence size
size_t
genLen
=
path
.
ids
.
size
();
generator_
.
ids
.
push_back
(
genLen
);
// sequence size
generator_
.
ids
.
insert
(
generator_
.
ids
.
insert
(
generator_
.
ids
.
end
(),
path
.
ids
.
begin
(),
path
.
ids
.
end
());
generator_
.
ids
.
end
(),
path
.
ids
.
begin
(),
path
.
ids
.
end
());
generator_
.
ids
.
push_back
(
-
1
);
// end of sequence
generator_
.
ids
.
push_back
(
-
1
);
// end of sequence
memcpy
(
idsProb
+
curPos
,
path
.
idsProb
.
data
(),
sizeof
(
real
)
*
genLen
);
curPos
+=
genLen
;
idsProb
[
curPos
++
]
=
-
1.0
;
probs
[
i
*
numResults
+
j
]
=
path
.
logProb
;
probs
[
i
*
numResults
+
j
]
=
path
.
logProb
;
if
(
!
j
&&
dataArgsSize_
)
{
if
(
!
j
&&
dataArgsSize_
)
{
...
...
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
浏览文件 @
927adb4a
...
@@ -189,6 +189,11 @@ public:
...
@@ -189,6 +189,11 @@ public:
*/
*/
std
::
vector
<
int
>
ids
;
std
::
vector
<
int
>
ids
;
/**
* @brief idsProb, log probability of each generated words.
*/
std
::
vector
<
real
>
idsProb
;
/**
/**
* @brief logProb, current probability of path.
* @brief logProb, current probability of path.
*/
*/
...
@@ -228,11 +233,13 @@ public:
...
@@ -228,11 +233,13 @@ public:
*/
*/
Path
(
Path
&
old
,
int
newId
,
real
logProb
,
int
machineId
,
int
topIndex
)
Path
(
Path
&
old
,
int
newId
,
real
logProb
,
int
machineId
,
int
topIndex
)
:
ids
(
old
.
ids
),
:
ids
(
old
.
ids
),
idsProb
(
old
.
idsProb
),
logProb
(
old
.
logProb
+
logProb
),
logProb
(
old
.
logProb
+
logProb
),
machineId
(
machineId
),
machineId
(
machineId
),
topIndex
(
topIndex
),
topIndex
(
topIndex
),
seqId
(
old
.
seqId
)
{
seqId
(
old
.
seqId
)
{
ids
.
push_back
(
newId
);
ids
.
push_back
(
newId
);
idsProb
.
push_back
(
logProb
);
if
(
!
old
.
probHistory
.
empty
())
{
if
(
!
old
.
probHistory
.
empty
())
{
this
->
probHistory
=
old
.
probHistory
;
this
->
probHistory
=
old
.
probHistory
;
// probHistory store current prob, not sum
// probHistory store current prob, not sum
...
@@ -411,8 +418,9 @@ protected:
...
@@ -411,8 +418,9 @@ protected:
struct
Generator
{
struct
Generator
{
GeneratorConfig
config
;
GeneratorConfig
config
;
std
::
vector
<
int
>
ids
;
// store generated sequences
std
::
vector
<
int
>
ids
;
// store generated sequences
Argument
outArg
;
// final output argument
std
::
vector
<
real
>
idsProb
;
// log probability of each generated word
Argument
outArg
;
// final output argument
};
};
bool
generating_
;
bool
generating_
;
Generator
generator_
;
Generator
generator_
;
...
...
paddle/gserver/layers/KmaxSeqScoreLayer.cpp
浏览文件 @
927adb4a
...
@@ -39,12 +39,13 @@ REGISTER_LAYER(kmax_seq_score, KmaxSeqScoreLayer);
...
@@ -39,12 +39,13 @@ REGISTER_LAYER(kmax_seq_score, KmaxSeqScoreLayer);
bool
KmaxSeqScoreLayer
::
init
(
const
LayerMap
&
layerMap
,
bool
KmaxSeqScoreLayer
::
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
const
ParameterMap
&
parameterMap
)
{
bool
ret
=
Layer
::
init
(
layerMap
,
parameterMap
);
bool
ret
=
Layer
::
init
(
layerMap
,
parameterMap
);
CHECK_EQ
(
1U
L
,
inputLayers_
.
size
());
CHECK_EQ
(
1U
,
inputLayers_
.
size
());
beamSize_
=
config_
.
beam_size
();
beamSize_
=
config_
.
beam_size
();
CHECK_GE
(
beamSize_
,
1
L
U
);
CHECK_GE
(
beamSize_
,
1U
);
setNeedSequenceInfo
(
false
);
setNeedSequenceInfo
(
false
);
setNeedGradient
(
false
);
return
ret
;
return
ret
;
}
}
...
@@ -96,18 +97,19 @@ void KmaxSeqScoreLayer::forward(PassType passType) {
...
@@ -96,18 +97,19 @@ void KmaxSeqScoreLayer::forward(PassType passType) {
scores_
=
inputScore
;
scores_
=
inputScore
;
}
}
MatrixPtr
outputValue
=
getOutputValue
();
Matrix
::
resizeOrCreate
(
Matrix
::
resizeOrCreate
(
output
V
alue
,
output
_
.
v
alue
,
input
.
hasSubseq
()
?
input
.
getNumSubSequences
()
:
input
.
getNumSequences
(),
input
.
hasSubseq
()
?
input
.
getNumSubSequences
()
:
input
.
getNumSequences
(),
beamSize_
);
beamSize_
,
outputValue
->
one
();
false
,
outputValue
->
mulScalar
(
-
1.
);
false
);
output_
.
value
->
one
();
output_
.
value
->
mulScalar
(
-
1.
);
kmaxScorePerSeq
(
scores_
->
getData
(),
kmaxScorePerSeq
(
scores_
->
getData
(),
output_
.
value
->
getData
(),
output_
.
value
->
getData
(),
input
.
hasSeq
()
?
input
.
subSequenceStartPositions
input
.
hasS
ubs
eq
()
?
input
.
subSequenceStartPositions
:
input
.
sequenceStartPositions
);
:
input
.
sequenceStartPositions
);
}
}
void
KmaxSeqScoreLayer
::
backward
(
const
UpdateCallback
&
callback
)
{}
void
KmaxSeqScoreLayer
::
backward
(
const
UpdateCallback
&
callback
)
{}
...
...
paddle/gserver/tests/LayerGradUtil.cpp
浏览文件 @
927adb4a
...
@@ -400,7 +400,6 @@ void initDataLayer(TestConfig testConf,
...
@@ -400,7 +400,6 @@ void initDataLayer(TestConfig testConf,
const
std
::
vector
<
int
>&
labelSeqStartPositions
=
const
std
::
vector
<
int
>&
labelSeqStartPositions
=
testConf
.
inputDefs
[
i
].
labelSeqStartPositions
;
testConf
.
inputDefs
[
i
].
labelSeqStartPositions
;
if
(
labelSeqStartPositions
.
size
()
!=
0
)
{
if
(
labelSeqStartPositions
.
size
()
!=
0
)
{
CHECK
(
!
sequenceStartPositions
);
CHECK_GE
(
static_cast
<
int
>
(
labelSeqStartPositions
.
size
()),
2
);
CHECK_GE
(
static_cast
<
int
>
(
labelSeqStartPositions
.
size
()),
2
);
sequenceStartPositions
=
sequenceStartPositions
=
...
@@ -410,6 +409,19 @@ void initDataLayer(TestConfig testConf,
...
@@ -410,6 +409,19 @@ void initDataLayer(TestConfig testConf,
useGpu
);
useGpu
);
data
.
sequenceStartPositions
=
sequenceStartPositions
;
data
.
sequenceStartPositions
=
sequenceStartPositions
;
}
}
const
std
::
vector
<
int
>&
labelSubSeqStartPositions
=
testConf
.
inputDefs
[
i
].
labelSubSeqStartPositions
;
if
(
labelSubSeqStartPositions
.
size
()
!=
0
)
{
CHECK_GE
(
static_cast
<
int
>
(
labelSubSeqStartPositions
.
size
()),
2
);
subSequenceStartPositions
=
ICpuGpuVector
::
create
(
labelSubSeqStartPositions
.
size
(),
useGpu
);
subSequenceStartPositions
->
copyFrom
(
labelSubSeqStartPositions
.
data
(),
labelSubSeqStartPositions
.
size
(),
useGpu
);
data
.
subSequenceStartPositions
=
subSequenceStartPositions
;
}
break
;
break
;
}
}
default:
default:
...
...
paddle/gserver/tests/LayerGradUtil.h
浏览文件 @
927adb4a
...
@@ -67,6 +67,7 @@ struct InputDef {
...
@@ -67,6 +67,7 @@ struct InputDef {
bool
isStatic
;
bool
isStatic
;
std
::
vector
<
int
>
labelInitValue
;
std
::
vector
<
int
>
labelInitValue
;
std
::
vector
<
int
>
labelSeqStartPositions
;
std
::
vector
<
int
>
labelSeqStartPositions
;
std
::
vector
<
int
>
labelSubSeqStartPositions
;
MatrixPtr
selfDefinedData
;
MatrixPtr
selfDefinedData
;
InputDef
(
InputType
type
,
string
nameIn
,
size_t
dimIn
,
size_t
sizeIn
)
{
InputDef
(
InputType
type
,
string
nameIn
,
size_t
dimIn
,
size_t
sizeIn
)
{
...
@@ -81,8 +82,10 @@ struct InputDef {
...
@@ -81,8 +82,10 @@ struct InputDef {
InputDef
(
InputType
type
,
InputDef
(
InputType
type
,
string
nameIn
,
string
nameIn
,
MatrixPtr
selfDefinedData
,
MatrixPtr
selfDefinedData
,
std
::
vector
<
int
>
selfDefinedSeqStartPos
=
{})
std
::
vector
<
int
>
selfDefinedSeqStartPos
=
{},
std
::
vector
<
int
>
selfDefinedSubSeqStartPos
=
{})
:
labelSeqStartPositions
(
selfDefinedSeqStartPos
),
:
labelSeqStartPositions
(
selfDefinedSeqStartPos
),
labelSubSeqStartPositions
(
selfDefinedSubSeqStartPos
),
selfDefinedData
(
selfDefinedData
)
{
selfDefinedData
(
selfDefinedData
)
{
inputType
=
type
;
inputType
=
type
;
name
=
nameIn
;
name
=
nameIn
;
...
...
paddle/gserver/tests/test_KmaxSeqScore.cpp
浏览文件 @
927adb4a
...
@@ -32,7 +32,6 @@ DECLARE_int32(gpu_id);
...
@@ -32,7 +32,6 @@ DECLARE_int32(gpu_id);
DECLARE_bool
(
thread_local_rand_use_global_seed
);
DECLARE_bool
(
thread_local_rand_use_global_seed
);
vector
<
int
>
randSampling
(
int
range
,
int
n
)
{
vector
<
int
>
randSampling
(
int
range
,
int
n
)
{
srand
(
1
);
CHECK_GE
(
range
,
n
);
CHECK_GE
(
range
,
n
);
vector
<
int
>
num
(
range
);
vector
<
int
>
num
(
range
);
iota
(
begin
(
num
),
end
(
num
),
0
);
iota
(
begin
(
num
),
end
(
num
),
0
);
...
@@ -45,7 +44,7 @@ vector<int> randSampling(int range, int n) {
...
@@ -45,7 +44,7 @@ vector<int> randSampling(int range, int n) {
void
genRandomSeqInfo
(
vector
<
int
>&
seqStartPosition
,
void
genRandomSeqInfo
(
vector
<
int
>&
seqStartPosition
,
vector
<
int
>&
subSeqStartPosition
)
{
vector
<
int
>&
subSeqStartPosition
)
{
const
int
maxSeqNum
=
5
;
const
int
maxSeqNum
=
100
;
// generate random start position information
// generate random start position information
int
seqNum
=
1
+
(
rand
()
%
maxSeqNum
);
int
seqNum
=
1
+
(
rand
()
%
maxSeqNum
);
seqStartPosition
.
resize
(
seqNum
+
1
,
0
);
seqStartPosition
.
resize
(
seqNum
+
1
,
0
);
...
@@ -61,78 +60,93 @@ void genRandomSeqInfo(vector<int>& seqStartPosition,
...
@@ -61,78 +60,93 @@ void genRandomSeqInfo(vector<int>& seqStartPosition,
void
genRandomGroundTruth
(
real
*
values
,
void
genRandomGroundTruth
(
real
*
values
,
vector
<
vector
<
int
>>&
groundTruth
,
vector
<
vector
<
int
>>&
groundTruth
,
vector
<
int
>&
seqStartPosition
,
vector
<
int
>&
startPos
,
vector
<
int
>&
subSeqStartPosition
,
bool
useSubseqInfo
,
size_t
beamSize
)
{
size_t
beamSize
)
{
auto
genData
=
[
&
](
real
*
values
,
vector
<
int
>&
startPos
,
size_t
beamSize
)
{
groundTruth
.
resize
(
startPos
.
size
()
-
1
,
vector
<
int
>
(
beamSize
,
-
1
));
groundTruth
.
resize
(
startPos
.
size
()
-
1
,
vector
<
int
>
(
beamSize
,
-
1
));
for
(
size_t
i
=
0
;
i
<
startPos
.
size
()
-
1
;
++
i
)
{
int
seqLen
=
startPos
[
i
+
1
]
-
startPos
[
i
];
for
(
size_t
i
=
0
;
i
<
startPos
.
size
()
-
1
;
++
i
)
{
vector
<
int
>
pos
=
int
seqLen
=
startPos
[
i
+
1
]
-
startPos
[
i
];
randSampling
(
seqLen
,
min
(
static_cast
<
int
>
(
beamSize
),
seqLen
));
vector
<
int
>
pos
=
for
(
size_t
j
=
0
;
j
<
pos
.
size
();
++
j
)
{
randSampling
(
seqLen
,
min
(
static_cast
<
int
>
(
beamSize
),
seqLen
));
groundTruth
[
i
][
j
]
=
pos
[
j
];
for
(
size_t
j
=
0
;
j
<
pos
.
size
();
++
j
)
{
values
[
startPos
[
i
]
+
pos
[
j
]]
=
1.
;
groundTruth
[
i
][
j
]
=
pos
[
j
];
values
[
subSeqStartPosition
[
i
]
+
pos
[
j
]]
=
1.
;
}
}
}
};
}
}
if
(
useSubseqInfo
)
void
checkLayerOut
(
vector
<
vector
<
int
>>
groundTruth
,
genData
(
values
,
subSeqStartPosition
,
beamSize
);
real
*
layerOut
,
else
size_t
beamSize
)
{
genData
(
values
,
seqStartPosition
,
beamSize
);
for
(
size_t
i
=
0
;
i
<
groundTruth
.
size
();
++
i
)
{
int
begPos
=
i
*
beamSize
;
vector
<
real
>
tmp
(
layerOut
+
begPos
,
layerOut
+
begPos
+
beamSize
);
sort
(
begin
(
tmp
),
end
(
tmp
));
sort
(
begin
(
groundTruth
[
i
]),
end
(
groundTruth
[
i
]));
for
(
size_t
j
=
0
;
j
<
beamSize
;
++
j
)
CHECK_EQ
(
tmp
[
j
],
groundTruth
[
i
][
j
]);
}
}
}
// Test that the batchNormLayer can be followed by a ConvLayer
TEST
(
Layer
,
kmaxSeqScoreLayer
)
{
TEST
(
Layer
,
kmaxSeqScoreLayer
)
{
const
size_t
beamSize
=
5
;
const
size_t
maxBeamSize
=
100
;
int
beamSize
=
1
+
(
rand
()
%
maxBeamSize
);
vector
<
int
>
seqStartPosition
;
vector
<
int
>
seqStartPosition
;
vector
<
int
>
subSeqStartPosition
;
vector
<
int
>
subSeqStartPosition
;
genRandomSeqInfo
(
seqStartPosition
,
subSeqStartPosition
);
genRandomSeqInfo
(
seqStartPosition
,
subSeqStartPosition
);
MatrixPtr
inValue
=
MatrixPtr
inValue
=
Matrix
::
create
(
subSeqStartPosition
.
back
(),
1
,
false
,
false
);
Matrix
::
create
(
subSeqStartPosition
.
back
(),
1
,
false
,
false
);
inValue
->
randomizeUniform
();
for
(
auto
hasSubseq
:
{
false
,
true
})
{
for
(
auto
hasSubseq
:
{
false
,
true
})
{
vector
<
vector
<
int
>>
groundTruth
;
vector
<
vector
<
int
>>
groundTruth
;
inValue
->
randomizeUniform
();
genRandomGroundTruth
(
inValue
->
getData
(),
genRandomGroundTruth
(
inValue
->
getData
(),
groundTruth
,
groundTruth
,
seqStartPosition
,
hasSubseq
?
subSeqStartPosition
:
seqStartPosition
,
subSeqStartPosition
,
hasSubseq
,
beamSize
);
beamSize
);
for
(
auto
useGpu
:
{
false
,
true
})
{
for
(
auto
useGpu
:
{
false
,
true
})
{
TestConfig
config
;
TestConfig
config
;
config
.
layerConfig
.
set_type
(
"kmax_seq_score"
);
config
.
layerConfig
.
set_type
(
"kmax_seq_score"
);
config
.
layerConfig
.
set_beam_size
(
beamSize
);
config
.
layerConfig
.
set_beam_size
(
beamSize
);
config
.
inputDefs
.
push_back
(
{
hasSubseq
?
INPUT_HASSUB_SEQUENCE_DATA
:
INPUT_SEQUENCE_DATA
,
if
(
hasSubseq
)
{
"layer_0"
,
config
.
inputDefs
.
push_back
({
INPUT_SELF_DEFINE_DATA
,
1
,
"scores"
,
0
});
inValue
,
seqStartPosition
,
subSeqStartPosition
});
}
else
{
config
.
inputDefs
.
push_back
(
{
INPUT_SELF_DEFINE_DATA
,
"scores"
,
inValue
,
seqStartPosition
});
}
config
.
layerConfig
.
add_inputs
();
config
.
layerConfig
.
add_inputs
();
// data layer initialize
// data layer initialize
std
::
vector
<
DataLayerPtr
>
dataLayers
;
std
::
vector
<
DataLayerPtr
>
dataLayers
;
LayerMap
layerMap
;
LayerMap
layerMap
;
vector
<
Argument
>
datas
;
vector
<
Argument
>
datas
;
initDataLayer
(
config
,
initDataLayer
(
&
dataLayers
,
config
,
&
datas
,
&
dataLayers
,
&
layerMap
,
&
datas
,
"kmax_seq_score"
,
&
layerMap
,
100
,
"kmax_seq_score"
,
false
,
100
/* actually this parameter is unused in self-defined input*/
,
useGpu
);
false
,
useGpu
);
// test layer initialize
// test layer initialize
std
::
vector
<
ParameterPtr
>
parameters
;
std
::
vector
<
ParameterPtr
>
parameters
;
LayerPtr
kmaxSeqScoreLayer
;
LayerPtr
kmaxSeqScoreLayer
;
FLAGS_use_gpu
=
useGpu
;
initTestLayer
(
config
,
&
layerMap
,
&
parameters
,
&
kmaxSeqScoreLayer
);
initTestLayer
(
config
,
&
layerMap
,
&
parameters
,
&
kmaxSeqScoreLayer
);
kmaxSeqScoreLayer
->
forward
(
PASS_TRAIN
);
kmaxSeqScoreLayer
->
forward
(
PASS_TRAIN
);
const
MatrixPtr
outValue
=
kmaxSeqScoreLayer
->
getOutputValue
();
CHECK_EQ
(
outValue
->
getHeight
(),
hasSubseq
?
subSeqStartPosition
.
size
()
-
1
:
seqStartPosition
.
size
()
-
1
);
CHECK_EQ
(
outValue
->
getWidth
(),
beamSize
);
checkLayerOut
(
groundTruth
,
outValue
->
getData
(),
beamSize
);
}
}
}
}
}
}
...
@@ -141,6 +155,6 @@ int main(int argc, char** argv) {
...
@@ -141,6 +155,6 @@ int main(int argc, char** argv) {
testing
::
InitGoogleTest
(
&
argc
,
argv
);
testing
::
InitGoogleTest
(
&
argc
,
argv
);
initMain
(
argc
,
argv
);
initMain
(
argc
,
argv
);
FLAGS_thread_local_rand_use_global_seed
=
true
;
FLAGS_thread_local_rand_use_global_seed
=
true
;
srand
(
1
);
srand
(
(
size_t
)(
time
(
NULL
))
);
return
RUN_ALL_TESTS
();
return
RUN_ALL_TESTS
();
}
}
paddle/math/BaseMatrix.cu
浏览文件 @
927adb4a
...
@@ -442,7 +442,8 @@ DEFINE_MATRIX_UNARY_PARAMETER_OP(Clip, TWO_PARAMETER,
...
@@ -442,7 +442,8 @@ DEFINE_MATRIX_UNARY_PARAMETER_OP(Clip, TWO_PARAMETER,
template
<
class
T
>
template
<
class
T
>
void
BaseMatrixT
<
T
>::
clip
(
T
p1
,
T
p2
)
{
applyUnary
(
unary
::
Clip
<
T
>
(
p1
,
p2
));
}
void
BaseMatrixT
<
T
>::
clip
(
T
p1
,
T
p2
)
{
applyUnary
(
unary
::
Clip
<
T
>
(
p1
,
p2
));
}
DEFINE_MATRIX_BINARY_PARAMETER_OP
(
ClipDerivative
,
TWO_PARAMETER
,
a
=
b
<
p1
?
0
:
(
b
>
p2
?
0
:
1
));
DEFINE_MATRIX_BINARY_PARAMETER_OP
(
ClipDerivative
,
TWO_PARAMETER
,
a
=
b
<
p1
?
0
:
(
b
>
p2
?
0
:
1
));
template
<
class
T
>
template
<
class
T
>
void
BaseMatrixT
<
T
>::
clipDerivative
(
BaseMatrixT
&
b
,
T
p1
,
T
p2
)
{
void
BaseMatrixT
<
T
>::
clipDerivative
(
BaseMatrixT
&
b
,
T
p1
,
T
p2
)
{
applyBinary
(
binary
::
ClipDerivative
<
T
>
(
p1
,
p2
),
b
);
applyBinary
(
binary
::
ClipDerivative
<
T
>
(
p1
,
p2
),
b
);
...
...
paddle/memory/detail/buddy_allocator.h
浏览文件 @
927adb4a
...
@@ -39,7 +39,7 @@ class BuddyAllocator {
...
@@ -39,7 +39,7 @@ class BuddyAllocator {
public:
public:
void
*
Alloc
(
size_t
unaligned_size
);
void
*
Alloc
(
size_t
unaligned_size
);
void
Free
(
void
*
);
void
Free
(
void
*
ptr
);
size_t
Used
();
size_t
Used
();
public:
public:
...
...
paddle/memory/detail/meta_cache.h
浏览文件 @
927adb4a
...
@@ -33,17 +33,17 @@ namespace detail {
...
@@ -33,17 +33,17 @@ namespace detail {
*/
*/
class
MetadataCache
{
class
MetadataCache
{
public:
public:
MetadataCache
(
bool
uses_gpu
);
explicit
MetadataCache
(
bool
uses_gpu
);
public:
public:
/*! \brief Load the associated metadata for the specified memory block. */
/*! \brief Load the associated metadata for the specified memory block. */
Metadata
load
(
const
MemoryBlock
*
);
Metadata
load
(
const
MemoryBlock
*
memory_block
);
/*! \brief Store the associated metadata for the specified memory block. */
/*! \brief Store the associated metadata for the specified memory block. */
void
store
(
MemoryBlock
*
,
const
Metadata
&
);
void
store
(
MemoryBlock
*
memory_block
,
const
Metadata
&
meta_data
);
/*! \brief Indicate that the specified metadata will no longer be used. */
/*! \brief Indicate that the specified metadata will no longer be used. */
void
invalidate
(
MemoryBlock
*
);
void
invalidate
(
MemoryBlock
*
memory_block
);
public:
public:
MetadataCache
(
const
MetadataCache
&
)
=
delete
;
MetadataCache
(
const
MetadataCache
&
)
=
delete
;
...
...
paddle/memory/memory.h
浏览文件 @
927adb4a
...
@@ -68,7 +68,7 @@ class PODDeleter {
...
@@ -68,7 +68,7 @@ class PODDeleter {
static_assert
(
std
::
is_pod
<
T
>::
value
,
"T must be POD"
);
static_assert
(
std
::
is_pod
<
T
>::
value
,
"T must be POD"
);
public:
public:
PODDeleter
(
Place
place
)
:
place_
(
place
)
{}
explicit
PODDeleter
(
Place
place
)
:
place_
(
place
)
{}
void
operator
()(
T
*
ptr
)
{
Free
(
place_
,
static_cast
<
void
*>
(
ptr
));
}
void
operator
()(
T
*
ptr
)
{
Free
(
place_
,
static_cast
<
void
*>
(
ptr
));
}
private:
private:
...
...
paddle/operators/CMakeLists.txt
浏览文件 @
927adb4a
...
@@ -41,6 +41,9 @@ function(op_library TARGET)
...
@@ -41,6 +41,9 @@ function(op_library TARGET)
endif
()
endif
()
endfunction
()
endfunction
()
cc_library
(
net_op SRCS net_op.cc DEPS op_registry
)
cc_test
(
net_op_test SRCS net_op_test.cc DEPS net_op
)
op_library
(
add_op SRCS add_op.cc add_op.cu
)
op_library
(
add_op SRCS add_op.cc add_op.cu
)
cc_test
(
add_op_test SRCS add_op_test.cc DEPS add_op
)
cc_test
(
add_op_test SRCS add_op_test.cc DEPS add_op
)
...
@@ -59,11 +62,6 @@ op_library(sgd_op SRCS sgd_op.cc sgd_op.cu)
...
@@ -59,11 +62,6 @@ op_library(sgd_op SRCS sgd_op.cc sgd_op.cu)
op_library
(
fc_op
op_library
(
fc_op
SRCS fc_op.cc
SRCS fc_op.cc
DEPS mul_op rowwise_add_op sigmoid_op softmax_op net
)
DEPS mul_op rowwise_add_op sigmoid_op softmax_op net_op
)
op_library
(
recurrent_op SRCS recurrent_op.cc DEPS op_desc tensor op_registry operator net_op
)
op_library
(
recurrent_network_op
cc_test
(
recurrent_op_test SRCS recurrent_op_test.cc DEPS recurrent_op gtest mul_op add_op
)
SRCS recurrent_network_op.cc
DEPS op_desc tensor net
)
cc_test
(
recurrent_network_op_test
SRCS recurrent_network_op_test.cc
DEPS recurrent_network_op mul_op add_op
)
paddle/operators/add_op.cu
浏览文件 @
927adb4a
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#define EIGEN_USE_GPU
#include "paddle/framework/op_registry.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/add_op.h"
#include "paddle/operators/add_op.h"
...
...
paddle/operators/add_op.h
浏览文件 @
927adb4a
...
@@ -28,9 +28,13 @@ public:
...
@@ -28,9 +28,13 @@ public:
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
EigenVector
<
T
>::
Flatten
(
*
output
).
device
(
context
.
GetEigenDevice
<
Place
>
())
=
auto
X
=
EigenVector
<
T
>::
Flatten
(
*
input0
);
framework
::
EigenVector
<
T
>::
Flatten
(
*
input0
)
+
auto
Y
=
EigenVector
<
T
>::
Flatten
(
*
input1
);
framework
::
EigenVector
<
T
>::
Flatten
(
*
input1
);
auto
Z
=
EigenVector
<
T
>::
Flatten
(
*
output
);
auto
place
=
context
.
GetEigenDevice
<
Place
>
();
Z
.
device
(
place
)
=
X
+
Y
;
}
}
};
};
...
...
paddle/operators/cross_entropy_op.cu
浏览文件 @
927adb4a
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#define EIGEN_USE_GPU
#include "paddle/operators/cross_entropy_op.h"
#include "paddle/operators/cross_entropy_op.h"
REGISTER_OP_GPU_KERNEL
(
onehot_cross_entropy
,
REGISTER_OP_GPU_KERNEL
(
onehot_cross_entropy
,
ops
::
OnehotCrossEntropyOpKernel
<
ops
::
GPUPlace
,
float
>
);
ops
::
OnehotCrossEntropyOpKernel
<
ops
::
GPUPlace
,
float
>
);
\ No newline at end of file
paddle/operators/fill_zeros_like_op.cu
浏览文件 @
927adb4a
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/op_registry.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/fill_zeros_like_op.h"
#include "paddle/operators/fill_zeros_like_op.h"
REGISTER_OP_GPU_KERNEL
(
REGISTER_OP_GPU_KERNEL
(
fill_zeros_like
,
fill_zeros_like
,
paddle
::
operators
::
FillZerosLikeKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
paddle
::
operators
::
FillZerosLikeKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
\ No newline at end of file
paddle/operators/mean_op.cc
浏览文件 @
927adb4a
...
@@ -33,13 +33,23 @@ public:
...
@@ -33,13 +33,23 @@ public:
MeanOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
MeanOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"The input of mean op"
);
AddInput
(
"X"
,
"The input of mean op"
);
AddOutput
(
"Out"
,
"The output of mean op"
);
AddOutput
(
"Out"
,
"The output of mean op"
)
.
IgnoreGradient
()
;
AddComment
(
"Mean Operator"
);
AddComment
(
"Mean Operator"
);
}
}
};
};
class
MeanGradOp
:
public
OperatorWithKernel
{
protected:
void
InferShape
(
const
InferShapeContext
&
ctx
)
const
override
{
ctx
.
Output
<
Tensor
>
(
"X"
+
GRAD_VAR_SUFFIX
())
->
Resize
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
dims
());
}
};
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
REGISTER_OP
(
mean
,
ops
::
MeanOp
,
ops
::
MeanOpMaker
);
REGISTER_OP
(
mean
,
ops
::
MeanOp
,
ops
::
MeanOpMaker
);
REGISTER_OP_CPU_KERNEL
(
mean
,
ops
::
MeanKernel
<
ops
::
CPUPlace
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
mean
,
ops
::
MeanKernel
<
ops
::
CPUPlace
,
float
>
);
REGISTER_GRADIENT_OP
(
mean
,
mean_grad
,
ops
::
MeanGradOp
);
REGISTER_OP_CPU_KERNEL
(
mean_grad
,
ops
::
MeanGradKernel
<
ops
::
CPUPlace
,
float
>
);
paddle/operators/mean_op.cu
浏览文件 @
927adb4a
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#define EIGEN_USE_GPU
#include "paddle/operators/mean_op.h"
#include "paddle/operators/mean_op.h"
REGISTER_OP_GPU_KERNEL
(
mean
,
ops
::
MeanKernel
<
ops
::
GPUPlace
,
float
>
);
REGISTER_OP_GPU_KERNEL
(
mean
,
ops
::
MeanKernel
<
ops
::
GPUPlace
,
float
>
);
REGISTER_OP_GPU_KERNEL
(
mean_grad
,
ops
::
MeanGradKernel
<
ops
::
GPUPlace
,
float
>
);
paddle/operators/mean_op.h
浏览文件 @
927adb4a
...
@@ -27,8 +27,28 @@ public:
...
@@ -27,8 +27,28 @@ public:
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
EigenScalar
<
T
>::
From
(
*
output
).
device
(
context
.
GetEigenDevice
<
Place
>
())
=
auto
X
=
EigenVector
<
T
>::
Flatten
(
*
input
);
EigenVector
<
T
>::
Flatten
(
*
input
).
mean
();
auto
y
=
EigenScalar
<
T
>::
From
(
*
output
);
auto
place
=
context
.
GetEigenDevice
<
Place
>
();
y
.
device
(
place
)
=
X
.
mean
();
}
};
template
<
typename
Place
,
typename
T
>
class
MeanGradKernel
:
public
OpKernel
{
public:
void
Compute
(
const
ExecutionContext
&
context
)
const
override
{
auto
OG
=
context
.
Input
<
Tensor
>
(
"Out"
+
OperatorBase
::
GRAD_VAR_SUFFIX
());
PADDLE_ENFORCE
(
framework
::
product
(
OG
->
dims
())
==
1
,
"Mean Gradient should be scalar"
);
auto
IG
=
context
.
Output
<
Tensor
>
(
"X"
+
OperatorBase
::
GRAD_VAR_SUFFIX
());
IG
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
ig_size
=
(
T
)
framework
::
product
(
IG
->
dims
());
EigenVector
<
T
>::
Flatten
(
*
IG
).
device
(
context
.
GetEigenDevice
<
Place
>
())
=
EigenScalar
<
T
>::
From
(
*
OG
)
/
ig_size
;
}
}
};
};
...
...
paddle/operators/mul_op.cu
浏览文件 @
927adb4a
...
@@ -15,4 +15,4 @@
...
@@ -15,4 +15,4 @@
#define EIGEN_USE_GPU
#define EIGEN_USE_GPU
#include "paddle/operators/mul_op.h"
#include "paddle/operators/mul_op.h"
REGISTER_OP_GPU_KERNEL
(
mul
,
ops
::
MulKernel
<
ops
::
GPUPlace
,
float
>
);
REGISTER_OP_GPU_KERNEL
(
mul
,
ops
::
MulKernel
<
ops
::
GPUPlace
,
float
>
);
\ No newline at end of file
paddle/operators/mul_op.h
浏览文件 @
927adb4a
...
@@ -26,13 +26,18 @@ public:
...
@@ -26,13 +26,18 @@ public:
Eigen
::
array
<
Eigen
::
IndexPair
<
Eigen
::
DenseIndex
>
,
1
>
dim_pair
=
{
Eigen
::
array
<
Eigen
::
IndexPair
<
Eigen
::
DenseIndex
>
,
1
>
dim_pair
=
{
{
Eigen
::
IndexPair
<
Eigen
::
DenseIndex
>
(
1
,
0
)}};
{
Eigen
::
IndexPair
<
Eigen
::
DenseIndex
>
(
1
,
0
)}};
auto
input0
=
context
.
Input
<
Tensor
>
(
"X"
);
auto
input1
=
context
.
Input
<
Tensor
>
(
"Y"
);
auto
output
=
context
.
Output
<
Tensor
>
(
0
);
auto
output
=
context
.
Output
<
Tensor
>
(
0
);
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
EigenMatrix
<
T
>::
From
(
*
output
).
device
(
context
.
GetEigenDevice
<
Place
>
())
=
auto
X
=
EigenMatrix
<
T
>::
From
(
*
input0
);
EigenMatrix
<
T
>::
From
(
*
context
.
Input
<
Tensor
>
(
"X"
))
auto
Y
=
EigenMatrix
<
T
>::
From
(
*
input1
);
.
contract
(
EigenMatrix
<
T
>::
From
(
*
context
.
Input
<
Tensor
>
(
"Y"
)),
auto
Z
=
EigenMatrix
<
T
>::
From
(
*
output
);
dim_pair
);
auto
place
=
context
.
GetEigenDevice
<
Place
>
();
Z
.
device
(
place
)
=
X
.
contract
(
Y
,
dim_pair
);
}
}
};
};
}
// namespace operators
}
// namespace operators
...
...
paddle/
framework/net
.cc
→
paddle/
operators/net_op
.cc
浏览文件 @
927adb4a
...
@@ -14,11 +14,11 @@
...
@@ -14,11 +14,11 @@
limitations under the License.
limitations under the License.
*/
*/
#include "paddle/
framework/net
.h"
#include "paddle/
operators/net_op
.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/op_registry.h"
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
operators
{
void
NetOp
::
CompleteAddOp
(
bool
calc
)
{
void
NetOp
::
CompleteAddOp
(
bool
calc
)
{
add_op_done_
=
true
;
add_op_done_
=
true
;
...
@@ -74,5 +74,5 @@ std::string NetOp::DebugString() const {
...
@@ -74,5 +74,5 @@ std::string NetOp::DebugString() const {
bool
NetOp
::
IsNetOp
()
const
{
return
true
;
}
bool
NetOp
::
IsNetOp
()
const
{
return
true
;
}
}
// namespace
framework
}
// namespace
operators
}
// namespace paddle
}
// namespace paddle
paddle/
framework/net
.h
→
paddle/
operators/net_op
.h
浏览文件 @
927adb4a
...
@@ -14,15 +14,17 @@ limitations under the License. */
...
@@ -14,15 +14,17 @@ limitations under the License. */
#pragma once
#pragma once
#include <paddle/framework/op_desc.pb.h>
#include "paddle/framework/op_desc.pb.h"
#include <paddle/framework/operator.h>
#include "paddle/framework/op_proto.pb.h"
#include "paddle/framework/op_proto.pb.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
#include "paddle/framework/scope.h"
#include "paddle/framework/scope.h"
#include "paddle/operators/type_alias.h"
#include "paddle/platform/device_context.h"
#include "paddle/platform/device_context.h"
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
operators
{
/**
/**
* @brief Network is also a type of Operator
* @brief Network is also a type of Operator
*
*
...
@@ -37,13 +39,13 @@ namespace framework {
...
@@ -37,13 +39,13 @@ namespace framework {
* This is the base class of network, all the networks should implement the APIs
* This is the base class of network, all the networks should implement the APIs
* it defines.
* it defines.
*/
*/
class
NetOp
:
public
OperatorBase
{
class
NetOp
:
public
framework
::
OperatorBase
{
public:
public:
/**
/**
* Infer all the operators' input and output variables' shapes, will be called
* Infer all the operators' input and output variables' shapes, will be called
* before every mini-batch
* before every mini-batch
*/
*/
void
InferShape
(
const
Scope
&
scope
)
const
override
{
void
InferShape
(
const
framework
::
Scope
&
scope
)
const
override
{
for
(
auto
&
op
:
ops_
)
{
for
(
auto
&
op
:
ops_
)
{
op
->
InferShape
(
scope
);
op
->
InferShape
(
scope
);
}
}
...
@@ -56,7 +58,7 @@ class NetOp : public OperatorBase {
...
@@ -56,7 +58,7 @@ class NetOp : public OperatorBase {
* scope will be used instead. If no OpContext is provicded, default context
* scope will be used instead. If no OpContext is provicded, default context
* will be used.
* will be used.
*/
*/
void
Run
(
const
Scope
&
scope
,
void
Run
(
const
framework
::
Scope
&
scope
,
const
platform
::
DeviceContext
&
dev_ctx
)
const
override
{
const
platform
::
DeviceContext
&
dev_ctx
)
const
override
{
for
(
auto
&
op
:
ops_
)
{
for
(
auto
&
op
:
ops_
)
{
op
->
Run
(
scope
,
dev_ctx
);
op
->
Run
(
scope
,
dev_ctx
);
...
@@ -88,7 +90,7 @@ class NetOp : public OperatorBase {
...
@@ -88,7 +90,7 @@ class NetOp : public OperatorBase {
std
::
vector
<
std
::
shared_ptr
<
OperatorBase
>>
ops_
;
std
::
vector
<
std
::
shared_ptr
<
OperatorBase
>>
ops_
;
private:
private:
bool
add_op_done_
{
false
};
bool
add_op_done_
{
false
};
template
<
typename
T
,
typename
KeyType
>
template
<
typename
T
,
typename
KeyType
>
...
@@ -97,5 +99,5 @@ class NetOp : public OperatorBase {
...
@@ -97,5 +99,5 @@ class NetOp : public OperatorBase {
}
}
};
};
}
// namespace
framework
}
// namespace
operators
}
// namespace paddle
}
// namespace paddle
paddle/
framework/net
_design.md
→
paddle/
operators/net_op
_design.md
浏览文件 @
927adb4a
文件已移动
paddle/
framework
/net_op_test.cc
→
paddle/
operators
/net_op_test.cc
浏览文件 @
927adb4a
#include "paddle/operators/net_op.h"
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include <paddle/framework/net.h>
#include
<paddle/framework/op_registry.h>
#include
"paddle/framework/op_registry.h"
#include
<paddle/framework/operator.h>
#include
"paddle/framework/operator.h"
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
operators
{
static
int
infer_shape_cnt
=
0
;
static
int
infer_shape_cnt
=
0
;
static
int
run_cnt
=
0
;
static
int
run_cnt
=
0
;
class
TestOp
:
public
OperatorBase
{
class
TestOp
:
public
OperatorBase
{
public:
public:
void
InferShape
(
const
framework
::
Scope
&
scope
)
const
override
{
void
InferShape
(
const
framework
::
Scope
&
scope
)
const
override
{
++
infer_shape_cnt
;
++
infer_shape_cnt
;
}
}
...
@@ -21,7 +23,7 @@ class TestOp : public OperatorBase {
...
@@ -21,7 +23,7 @@ class TestOp : public OperatorBase {
};
};
class
EmptyOp
:
public
OperatorBase
{
class
EmptyOp
:
public
OperatorBase
{
public:
public:
void
InferShape
(
const
Scope
&
scope
)
const
override
{}
void
InferShape
(
const
Scope
&
scope
)
const
override
{}
void
Run
(
const
Scope
&
scope
,
void
Run
(
const
Scope
&
scope
,
const
platform
::
DeviceContext
&
dev_ctx
)
const
override
{}
const
platform
::
DeviceContext
&
dev_ctx
)
const
override
{}
...
@@ -73,7 +75,7 @@ TEST(OpKernel, all) {
...
@@ -73,7 +75,7 @@ TEST(OpKernel, all) {
ASSERT_THROW
(
net
->
AddOp
(
op2
),
paddle
::
platform
::
EnforceNotMet
);
ASSERT_THROW
(
net
->
AddOp
(
op2
),
paddle
::
platform
::
EnforceNotMet
);
}
}
TEST
(
Net
,
insert_op
)
{
TEST
(
Net
Op
,
insert_op
)
{
NetOp
net
;
NetOp
net
;
auto
op1
=
std
::
make_shared
<
EmptyOp
>
();
auto
op1
=
std
::
make_shared
<
EmptyOp
>
();
op1
->
inputs_
=
{
"x"
,
"w1"
,
"b1"
};
op1
->
inputs_
=
{
"x"
,
"w1"
,
"b1"
};
...
@@ -85,5 +87,5 @@ TEST(Net, insert_op) {
...
@@ -85,5 +87,5 @@ TEST(Net, insert_op) {
ASSERT_EQ
(
3UL
,
net
.
ops_
.
size
());
ASSERT_EQ
(
3UL
,
net
.
ops_
.
size
());
}
}
}
// namespace
framework
}
// namespace
operators
}
// namespace paddle
}
// namespace paddle
paddle/operators/recurrent_
network_
op.cc
→
paddle/operators/recurrent_op.cc
浏览文件 @
927adb4a
...
@@ -12,14 +12,14 @@
...
@@ -12,14 +12,14 @@
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/operators/recurrent_
network_
op.h"
#include "paddle/operators/recurrent_op.h"
#include <glog/logging.h>
#include <glog/logging.h>
#include <cstring>
#include <cstring>
#include <sstream>
#include <sstream>
#include "paddle/framework/net.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/net_op.h"
#include "paddle/platform/enforce.h"
#include "paddle/platform/enforce.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -29,11 +29,15 @@ namespace rnn {
...
@@ -29,11 +29,15 @@ namespace rnn {
void
SegmentInputs
(
const
std
::
vector
<
Scope
*>&
step_scopes
,
void
SegmentInputs
(
const
std
::
vector
<
Scope
*>&
step_scopes
,
const
std
::
vector
<
Link
>&
inlinks
,
const
std
::
vector
<
Link
>&
inlinks
,
const
size_t
seq_len
)
{
const
size_t
seq_len
,
bool
infer_shape_mode
)
{
PADDLE_ENFORCE
(
!
inlinks
.
empty
(),
"no in links are provided."
);
PADDLE_ENFORCE
(
!
inlinks
.
empty
(),
"no in links are provided."
);
for
(
size_t
i
=
0
;
i
<
inlinks
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
inlinks
.
size
();
++
i
)
{
Tensor
*
input
=
auto
input_var
=
step_scopes
[
0
]
->
FindVar
(
inlinks
[
i
].
external
);
step_scopes
[
0
]
->
FindVar
(
inlinks
[
i
].
external
)
->
GetMutable
<
Tensor
>
();
PADDLE_ENFORCE
(
input_var
!=
nullptr
,
"input link [%s] is not in scope."
,
inlinks
[
i
].
external
);
Tensor
*
input
=
input_var
->
GetMutable
<
Tensor
>
();
DDim
dims
=
input
->
dims
();
DDim
dims
=
input
->
dims
();
PADDLE_ENFORCE
(
static_cast
<
size_t
>
(
dims
[
0
])
==
seq_len
,
PADDLE_ENFORCE
(
static_cast
<
size_t
>
(
dims
[
0
])
==
seq_len
,
"all the inlinks must have same length"
);
"all the inlinks must have same length"
);
...
@@ -41,7 +45,9 @@ void SegmentInputs(const std::vector<Scope*>& step_scopes,
...
@@ -41,7 +45,9 @@ void SegmentInputs(const std::vector<Scope*>& step_scopes,
for
(
size_t
j
=
0
;
j
<
seq_len
;
j
++
)
{
for
(
size_t
j
=
0
;
j
<
seq_len
;
j
++
)
{
Tensor
*
step_input
=
Tensor
*
step_input
=
step_scopes
[
j
]
->
NewVar
(
inlinks
[
i
].
internal
)
->
GetMutable
<
Tensor
>
();
step_scopes
[
j
]
->
NewVar
(
inlinks
[
i
].
internal
)
->
GetMutable
<
Tensor
>
();
*
step_input
=
input
->
Slice
<
float
>
(
j
,
j
+
1
);
if
(
!
infer_shape_mode
)
{
*
step_input
=
input
->
Slice
<
float
>
(
j
,
j
+
1
);
}
step_input
->
Resize
(
step_dims
);
step_input
->
Resize
(
step_dims
);
}
}
}
}
...
@@ -49,36 +55,41 @@ void SegmentInputs(const std::vector<Scope*>& step_scopes,
...
@@ -49,36 +55,41 @@ void SegmentInputs(const std::vector<Scope*>& step_scopes,
void
ConcatOutputs
(
const
std
::
vector
<
Scope
*>&
step_scopes
,
void
ConcatOutputs
(
const
std
::
vector
<
Scope
*>&
step_scopes
,
const
std
::
vector
<
Link
>&
outlinks
,
const
std
::
vector
<
Link
>&
outlinks
,
const
size_t
seq_len
)
{
const
size_t
seq_len
,
bool
infer_shape_mode
)
{
for
(
size_t
i
=
0
;
i
<
outlinks
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
outlinks
.
size
();
i
++
)
{
Tensor
*
output
=
auto
output_var
=
step_scopes
[
0
]
->
FindVar
(
outlinks
[
i
].
external
);
step_scopes
[
0
]
->
FindVar
(
outlinks
[
i
].
external
)
->
GetMutable
<
Tensor
>
();
PADDLE_ENFORCE
(
output_var
!=
nullptr
,
"output link [%s] is not in scope."
,
// TODO(qingiqng) remove following code after adding
outlinks
[
i
].
external
);
// InferShape in RecurrentGradientOp
Tensor
*
output
=
output_var
->
GetMutable
<
Tensor
>
();
DDim
step_dims
=
step_scopes
[
0
]
if
(
infer_shape_mode
)
{
->
FindVar
(
outlinks
[
i
].
internal
)
DDim
step_dims
=
step_scopes
[
0
]
->
GetMutable
<
Tensor
>
()
->
FindVar
(
outlinks
[
i
].
internal
)
->
dims
();
->
GetMutable
<
Tensor
>
()
std
::
vector
<
int
>
dims_vec
=
vectorize
(
step_dims
);
->
dims
();
dims_vec
.
insert
(
dims_vec
.
begin
(),
seq_len
);
std
::
vector
<
int
>
dims_vec
=
vectorize
(
step_dims
);
output
->
mutable_data
<
float
>
(
make_ddim
(
dims_vec
),
platform
::
CPUPlace
());
dims_vec
.
insert
(
dims_vec
.
begin
(),
seq_len
);
output
->
Resize
(
make_ddim
(
dims_vec
));
for
(
size_t
j
=
0
;
j
<
seq_len
;
j
++
)
{
}
else
{
Tensor
*
step_output
=
output
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
step_scopes
[
j
]
->
FindVar
(
outlinks
[
i
].
internal
)
->
GetMutable
<
Tensor
>
();
for
(
size_t
j
=
0
;
j
<
seq_len
;
j
++
)
{
// TODO(luotao02) data type and platform::DeviceContext() should set
Tensor
*
step_output
=
// correctly
step_scopes
[
j
]
->
FindVar
(
outlinks
[
i
].
internal
)
->
GetMutable
<
Tensor
>
();
(
output
->
Slice
<
float
>
(
j
,
j
+
1
))
// TODO(luotao02) data type and platform::DeviceContext() should set
.
CopyFrom
<
float
>
(
*
step_output
,
platform
::
CPUPlace
());
// correctly
(
output
->
Slice
<
float
>
(
j
,
j
+
1
))
.
CopyFrom
<
float
>
(
*
step_output
,
platform
::
CPUPlace
());
}
}
}
}
}
}
}
void
LinkMemories
(
const
std
::
vector
<
Scope
*>&
scopes
,
void
LinkMemories
(
const
std
::
vector
<
Scope
*>&
scopes
,
const
std
::
vector
<
rnn
::
MemoryAttr
>&
memories
,
const
std
::
vector
<
rnn
::
MemoryAttr
>&
memories
,
size_t
step_id
,
const
size_t
step_id
,
int
offset
)
{
const
int
offset
,
bool
infer_shape_mode
)
{
PADDLE_ENFORCE
(
step_id
<
scopes
.
size
(),
PADDLE_ENFORCE
(
step_id
<
scopes
.
size
(),
"step [%d] is out of range of step scopes' size [%d]"
,
"step [%d] is out of range of step scopes' size [%d]"
,
step_id
,
step_id
,
...
@@ -95,18 +106,13 @@ void LinkMemories(const std::vector<Scope*>& scopes,
...
@@ -95,18 +106,13 @@ void LinkMemories(const std::vector<Scope*>& scopes,
auto
scope
=
scopes
[
step_id
];
auto
scope
=
scopes
[
step_id
];
auto
linked_scope
=
scopes
[
step_id
+
offset
];
auto
linked_scope
=
scopes
[
step_id
+
offset
];
for
(
auto
&
attr
:
memories
)
{
for
(
auto
&
attr
:
memories
)
{
auto
mem
=
scope
->
NewVar
(
attr
.
pre_var
)
->
GetMutable
<
Tensor
>
();
auto
mem
=
scope
->
FindVar
(
attr
.
pre_var
)
->
GetMutable
<
Tensor
>
();
// maybe share variable is better?
auto
linked_mem
=
linked_scope
->
FindVar
(
attr
.
var
)
->
GetMutable
<
Tensor
>
();
auto
linked_mem
=
linked_scope
->
FindVar
(
attr
.
var
)
->
GetMutable
<
Tensor
>
();
mem
->
ShareDataWith
<
float
>
(
*
linked_mem
);
if
(
infer_shape_mode
)
{
mem
->
Resize
(
linked_mem
->
dims
());
// TODO(qingqing) remove following code
}
else
{
// the memory of current step should be allocated in step net
mem
->
ShareDataWith
<
float
>
(
*
linked_mem
);
auto
m
=
scope
->
NewVar
(
attr
.
var
)
->
GetMutable
<
Tensor
>
();
}
// for unit test, as addOp and mulOp are null currently, if not
// mutable_data, mem.data() in output will be error. We will
// remove this line after merge the correct addOp and mulOp.
m
->
mutable_data
<
float
>
(
mem
->
dims
(),
platform
::
CPUPlace
());
}
}
}
}
...
@@ -175,60 +181,39 @@ void RecurrentAlgorithm::InferShape(const Scope& scope) const {
...
@@ -175,60 +181,39 @@ void RecurrentAlgorithm::InferShape(const Scope& scope) const {
->
dims
()[
0
];
->
dims
()[
0
];
CreateScopes
(
scope
);
CreateScopes
(
scope
);
auto
step_scopes
=
GetStepScopes
(
scope
);
auto
step_scopes
=
GetStepScopes
(
scope
);
rnn
::
SegmentInputs
(
// SegmentInputs is called in InferShape. The input must hold memory in
step_scopes
,
arg_
->
inlinks
,
seq_len_
,
true
/*infer_shape_mode*/
);
// SegmentInputs. But the other op only set dimension for the output in
InitMemories
(
step_scopes
[
0
],
true
/*infer_shape_mode*/
);
// InferShape. That's a problem. Wether the RNN op needs InferShape or not?
// Wether the following functions (SegmentInputs, InitMemories, ...) need
// to rewrite for RNN op?
rnn
::
SegmentInputs
(
step_scopes
,
arg_
->
inlinks
,
seq_len_
);
InitMemories
(
step_scopes
[
0
]);
PADDLE_ENFORCE
(
scope
.
FindVar
(
arg_
->
step_net
)
!=
nullptr
,
"stepnet [%s] is not in scope."
,
arg_
->
step_net
);
Variable
*
net
=
scope
.
FindVar
(
arg_
->
step_net
);
Variable
*
net
=
scope
.
FindVar
(
arg_
->
step_net
);
PADDLE_ENFORCE
(
net
!=
nullptr
,
"failed to get step net"
);
PADDLE_ENFORCE
(
net
!=
nullptr
,
"failed to get step net"
);
// If the InferShape is called in OperatorBase's run function,
// the rnn op only needs to do InferShape for the first time step
for
(
size_t
i
=
0
;
i
<
seq_len_
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
seq_len_
;
i
++
)
{
if
(
i
>
0
)
{
if
(
i
>
0
)
{
rnn
::
LinkMemories
(
step_scopes
,
arg_
->
memories
,
i
,
-
1
);
rnn
::
LinkMemories
(
step_scopes
,
arg_
->
memories
,
i
,
-
1
,
true
/*infer_shape_mode*/
);
}
}
net
->
GetMutable
<
NetOp
>
()
->
InferShape
(
*
step_scopes
[
i
]);
net
->
GetMutable
<
NetOp
>
()
->
InferShape
(
*
step_scopes
[
i
]);
}
}
rnn
::
ConcatOutputs
(
auto
outlinks
=
arg_
->
outlinks
;
step_scopes
,
arg_
->
outlinks
,
seq_len_
,
true
/*infer_shape_mode*/
);
for
(
size_t
i
=
0
;
i
<
outlinks
.
size
();
i
++
)
{
DDim
step_dims
=
step_scopes
[
0
]
->
FindVar
(
outlinks
[
i
].
internal
)
->
GetMutable
<
Tensor
>
()
->
dims
();
std
::
vector
<
int
>
dims_vec
=
vectorize
(
step_dims
);
// now only support fixed length
dims_vec
.
insert
(
dims_vec
.
begin
(),
seq_len_
);
Tensor
*
output
=
step_scopes
[
0
]
->
FindVar
(
outlinks
[
i
].
external
)
->
GetMutable
<
Tensor
>
();
output
->
Resize
(
make_ddim
(
dims_vec
));
}
}
}
void
RecurrentAlgorithm
::
Run
(
const
Scope
&
scope
,
void
RecurrentAlgorithm
::
Run
(
const
Scope
&
scope
,
const
platform
::
DeviceContext
&
dev_ctx
)
const
{
const
platform
::
DeviceContext
&
dev_ctx
)
const
{
auto
step_scopes
=
GetStepScopes
(
scope
);
auto
step_scopes
=
GetStepScopes
(
scope
);
rnn
::
SegmentInputs
(
step_scopes
,
arg_
->
inlinks
,
seq_len_
,
false
/*infer_shape_mode*/
);
InitMemories
(
step_scopes
[
0
],
false
/*infer_shape_mode*/
);
Variable
*
net
=
scope
.
FindVar
(
arg_
->
step_net
);
Variable
*
net
=
scope
.
FindVar
(
arg_
->
step_net
);
for
(
size_t
step_id
=
0
;
step_id
<
seq_len_
;
step_id
++
)
{
for
(
size_t
step_id
=
0
;
step_id
<
seq_len_
;
step_id
++
)
{
// the link memory is done in InferShape
// maybe remove following code after testing
if
(
step_id
>
0
)
{
if
(
step_id
>
0
)
{
rnn
::
LinkMemories
(
step_scopes
,
arg_
->
memories
,
step_id
,
-
1
);
rnn
::
LinkMemories
(
step_scopes
,
arg_
->
memories
,
step_id
,
-
1
,
false
/*infer_shape_mode*/
);
}
}
net
->
GetMutable
<
NetOp
>
()
->
Run
(
*
step_scopes
[
step_id
],
dev_ctx
);
net
->
GetMutable
<
NetOp
>
()
->
Run
(
*
step_scopes
[
step_id
],
dev_ctx
);
}
}
rnn
::
ConcatOutputs
(
rnn
::
ConcatOutputs
(
step_scopes
,
arg_
->
outlinks
,
seq_len_
);
step_scopes
,
arg_
->
outlinks
,
seq_len_
,
false
/*infer_shape_mode*/
);
}
}
void
RecurrentAlgorithm
::
CreateScopes
(
const
Scope
&
scope
)
const
{
void
RecurrentAlgorithm
::
CreateScopes
(
const
Scope
&
scope
)
const
{
...
@@ -244,18 +229,19 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope) const {
...
@@ -244,18 +229,19 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope) const {
// Now all variables in scope must be created outside of op.
// Now all variables in scope must be created outside of op.
auto
net_op
=
scope
.
FindVar
(
arg_
->
step_net
)
->
GetMutable
<
NetOp
>
();
auto
net_op
=
scope
.
FindVar
(
arg_
->
step_net
)
->
GetMutable
<
NetOp
>
();
for
(
auto
&
input
:
net_op
->
inputs_
)
{
for
(
auto
&
input
:
net_op
->
inputs_
)
{
// the weight are located in parent scope
if
(
!
step_scope
.
FindVar
(
input
))
step_scope
.
NewVar
(
input
);
if
(
!
step_scope
.
FindVar
(
input
))
step_scope
.
NewVar
(
input
);
}
}
for
(
auto
&
output
:
net_op
->
outputs_
)
{
for
(
auto
&
output
:
net_op
->
outputs_
)
{
step_scope
.
NewVar
(
output
);
step_scope
.
NewVar
(
output
);
}
}
step_scopes
->
emplace_back
(
&
step_scope
);
step_scopes
->
emplace_back
(
&
step_scope
);
}
}
}
}
}
}
void
RecurrentAlgorithm
::
InitMemories
(
Scope
*
step_scope
)
const
{
void
RecurrentAlgorithm
::
InitMemories
(
Scope
*
step_scope
,
bool
infer_shape_mode
)
const
{
for
(
auto
&
attr
:
arg_
->
memories
)
{
for
(
auto
&
attr
:
arg_
->
memories
)
{
Tensor
*
pre_mem
=
step_scope
->
NewVar
(
attr
.
pre_var
)
->
GetMutable
<
Tensor
>
();
Tensor
*
pre_mem
=
step_scope
->
NewVar
(
attr
.
pre_var
)
->
GetMutable
<
Tensor
>
();
PADDLE_ENFORCE
(
step_scope
->
FindVar
(
attr
.
boot_var
)
!=
nullptr
,
PADDLE_ENFORCE
(
step_scope
->
FindVar
(
attr
.
boot_var
)
!=
nullptr
,
...
@@ -263,13 +249,11 @@ void RecurrentAlgorithm::InitMemories(Scope* step_scope) const {
...
@@ -263,13 +249,11 @@ void RecurrentAlgorithm::InitMemories(Scope* step_scope) const {
attr
.
var
,
attr
.
var
,
attr
.
boot_var
);
attr
.
boot_var
);
Tensor
*
boot_mem
=
step_scope
->
FindVar
(
attr
.
boot_var
)
->
GetMutable
<
Tensor
>
();
Tensor
*
boot_mem
=
step_scope
->
FindVar
(
attr
.
boot_var
)
->
GetMutable
<
Tensor
>
();
pre_mem
->
ShareDataWith
<
float
>
(
*
boot_mem
);
if
(
infer_shape_mode
)
{
pre_mem
->
Resize
(
boot_mem
->
dims
());
// TODO(qingqing) remove following code
}
else
{
// the memory of current step should be allocated in step net
pre_mem
->
ShareDataWith
<
float
>
(
*
boot_mem
);
// here for unit test
}
auto
cur_step_mem
=
step_scope
->
NewVar
(
attr
.
var
)
->
GetMutable
<
Tensor
>
();
cur_step_mem
->
mutable_data
<
float
>
(
boot_mem
->
dims
(),
platform
::
CPUPlace
());
}
}
}
}
...
@@ -307,13 +291,14 @@ public:
...
@@ -307,13 +291,14 @@ public:
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
const
auto
&
name
=
RecurrentOp
::
kArgName
;
const
auto
&
name
=
RecurrentOp
::
kArgName
;
// inputs and outputs stored in proto
// inputs and outputs stored in proto
AddInput
(
name
.
inlinks
,
"the input that need to be segmented for each step."
)
AddInput
(
name
.
inlinks
,
"the inputs that need to be segmented for each step."
)
.
SetMultiple
();
.
SetMultiple
();
AddInput
(
name
.
boot_memories
,
"variables to initialize memories."
)
AddInput
(
name
.
boot_memories
,
"variables to initialize memories."
)
.
SetMultiple
();
.
SetMultiple
();
AddInput
(
name
.
step_net
,
"network shared by all steps."
);
AddInput
(
name
.
step_net
,
"network shared by all steps."
);
AddOutput
(
name
.
outlinks
,
"the output that need to concated for all steps."
)
AddOutput
(
name
.
outlinks
,
"the output
s
that need to concated for all steps."
)
.
SetMultiple
();
.
SetMultiple
();
AddOutput
(
name
.
step_scopes
,
"step scopes"
);
AddOutput
(
name
.
step_scopes
,
"step scopes"
);
...
@@ -331,34 +316,39 @@ public:
...
@@ -331,34 +316,39 @@ public:
void
RecurrentGradientAlgorithm
::
Run
(
void
RecurrentGradientAlgorithm
::
Run
(
const
Scope
&
scope
,
const
platform
::
DeviceContext
&
dev_ctx
)
const
{
const
Scope
&
scope
,
const
platform
::
DeviceContext
&
dev_ctx
)
const
{
auto
step_scopes
=
GetStepScopes
(
scope
);
auto
step_scopes
=
GetStepScopes
(
scope
);
rnn
::
SegmentInputs
(
step_scopes
,
arg_
->
inlinks
,
seq_len_
);
rnn
::
SegmentInputs
(
PADDLE_ENFORCE
(
scope
.
FindVar
(
arg_
->
step_net
)
!=
nullptr
,
step_scopes
,
arg_
->
inlinks
,
seq_len_
,
false
/*infer_shape_mode*/
);
"step net is not in scope."
);
Variable
*
net
=
scope
.
FindVar
(
arg_
->
step_net
);
Variable
*
net
=
scope
.
FindVar
(
arg_
->
step_net
);
PADDLE_ENFORCE
(
net
!=
nullptr
,
"failed to get step net"
);
PADDLE_ENFORCE
(
net
!=
nullptr
,
"failed to get step net"
);
for
(
int
step_id
=
seq_len_
-
1
;
step_id
>=
0
;
--
step_id
)
{
for
(
int
step_id
=
seq_len_
-
1
;
step_id
>=
0
;
--
step_id
)
{
if
(
static_cast
<
size_t
>
(
step_id
)
!=
seq_len_
-
1
)
{
if
(
static_cast
<
size_t
>
(
step_id
)
!=
seq_len_
-
1
)
{
rnn
::
LinkMemories
(
step_scopes
,
arg_
->
memories
,
step_id
,
1
);
rnn
::
LinkMemories
(
step_scopes
,
arg_
->
memories
,
step_id
,
1
,
false
/*infer_shape_mode*/
);
}
}
net
->
GetMutable
<
NetOp
>
()
->
Run
(
*
step_scopes
[
step_id
],
dev_ctx
);
net
->
GetMutable
<
NetOp
>
()
->
Run
(
*
step_scopes
[
step_id
],
dev_ctx
);
}
}
LinkBootMemoryGradients
(
step_scopes
[
0
]);
LinkBootMemoryGradients
(
step_scopes
[
0
],
false
);
rnn
::
ConcatOutputs
(
step_scopes
,
arg_
->
outlinks
,
seq_len_
);
rnn
::
ConcatOutputs
(
step_scopes
,
arg_
->
outlinks
,
seq_len_
,
false
/*infer_shape_mode*/
);
}
}
void
RecurrentGradientAlgorithm
::
LinkBootMemoryGradients
(
void
RecurrentGradientAlgorithm
::
LinkBootMemoryGradients
(
Scope
*
step_scope
)
const
{
Scope
*
step_scope
,
bool
infer_shape_mode
)
const
{
for
(
auto
&
attr
:
arg_
->
memories
)
{
for
(
auto
&
attr
:
arg_
->
memories
)
{
Tensor
*
mem_grad
=
step_scope
->
NewVar
(
attr
.
var
)
->
GetMutable
<
Tensor
>
();
PADDLE_ENFORCE
(
step_scope
->
FindVar
(
attr
.
var
)
!=
nullptr
,
PADDLE_ENFORCE
(
mem_grad
!=
nullptr
,
"memory variable [%s] does not exists"
,
"boot_tensor should be retrieved before"
);
attr
.
var
);
PADDLE_ENFORCE
(
step_scope
->
FindVar
(
attr
.
boot_var
)
!=
nullptr
,
PADDLE_ENFORCE
(
step_scope
->
FindVar
(
attr
.
boot_var
)
!=
nullptr
,
"memory [%s]'s boot variable [%s] not exists"
,
"boot variable [%s] does not exists"
,
attr
.
var
,
attr
.
boot_var
);
attr
.
boot_var
);
Tensor
*
mem_grad
=
step_scope
->
NewVar
(
attr
.
var
)
->
GetMutable
<
Tensor
>
();
Tensor
*
boot_mem_grad
=
Tensor
*
boot_mem_grad
=
step_scope
->
NewVar
(
attr
.
boot_var
)
->
GetMutable
<
Tensor
>
();
step_scope
->
NewVar
(
attr
.
boot_var
)
->
GetMutable
<
Tensor
>
();
boot_mem_grad
->
ShareDataWith
<
float
>
(
*
mem_grad
);
if
(
infer_shape_mode
)
{
boot_mem_grad
->
Resize
(
mem_grad
->
dims
());
}
else
{
boot_mem_grad
->
ShareDataWith
<
float
>
(
*
mem_grad
);
}
}
}
}
}
...
@@ -367,34 +357,20 @@ void RecurrentGradientAlgorithm::InferShape(const Scope& scope) const {
...
@@ -367,34 +357,20 @@ void RecurrentGradientAlgorithm::InferShape(const Scope& scope) const {
->
GetMutable
<
Tensor
>
()
->
GetMutable
<
Tensor
>
()
->
dims
()[
0
];
->
dims
()[
0
];
auto
step_scopes
=
GetStepScopes
(
scope
);
auto
step_scopes
=
GetStepScopes
(
scope
);
rnn
::
SegmentInputs
(
step_scopes
,
arg_
->
inlinks
,
seq_len_
);
rnn
::
SegmentInputs
(
step_scopes
,
arg_
->
inlinks
,
seq_len_
,
true
/*infer_shape_mode*/
);
PADDLE_ENFORCE
(
scope
.
FindVar
(
arg_
->
step_net
)
!=
nullptr
,
"step net is not in scope."
);
Variable
*
net
=
scope
.
FindVar
(
arg_
->
step_net
);
Variable
*
net
=
scope
.
FindVar
(
arg_
->
step_net
);
PADDLE_ENFORCE
(
net
!=
nullptr
,
"failed to get step net"
);
PADDLE_ENFORCE
(
net
!=
nullptr
,
"failed to get step net"
);
for
(
int
step_id
=
seq_len_
-
1
;
step_id
>=
0
;
--
step_id
)
{
for
(
int
step_id
=
seq_len_
-
1
;
step_id
>=
0
;
--
step_id
)
{
if
(
static_cast
<
size_t
>
(
step_id
)
!=
seq_len_
-
1
)
{
if
(
static_cast
<
size_t
>
(
step_id
)
!=
seq_len_
-
1
)
{
rnn
::
LinkMemories
(
step_scopes
,
arg_
->
memories
,
step_id
,
1
);
rnn
::
LinkMemories
(
step_scopes
,
arg_
->
memories
,
step_id
,
1
,
true
/*infer_shape_mode*/
);
}
}
net
->
GetMutable
<
NetOp
>
()
->
InferShape
(
*
step_scopes
[
step_id
]);
net
->
GetMutable
<
NetOp
>
()
->
InferShape
(
*
step_scopes
[
step_id
]);
}
}
rnn
::
ConcatOutputs
(
auto
outlinks
=
arg_
->
outlinks
;
step_scopes
,
arg_
->
outlinks
,
seq_len_
,
true
/*infer_shape_mode*/
);
for
(
size_t
i
=
0
;
i
<
outlinks
.
size
();
i
++
)
{
LinkBootMemoryGradients
(
step_scopes
[
0
],
true
/*infer_shape_mode*/
);
DDim
step_dims
=
step_scopes
[
0
]
->
FindVar
(
outlinks
[
i
].
internal
)
->
GetMutable
<
Tensor
>
()
->
dims
();
std
::
vector
<
int
>
dims_vec
=
vectorize
(
step_dims
);
// now only support fixed length
dims_vec
.
insert
(
dims_vec
.
begin
(),
seq_len_
);
Tensor
*
output
=
step_scopes
[
0
]
->
FindVar
(
outlinks
[
i
].
external
)
->
GetMutable
<
Tensor
>
();
output
->
Resize
(
make_ddim
(
dims_vec
));
}
LinkBootMemoryGradients
(
step_scopes
[
0
]);
}
}
void
RecurrentGradientOp
::
Init
()
{
void
RecurrentGradientOp
::
Init
()
{
...
...
paddle/operators/recurrent_
network_
op.h
→
paddle/operators/recurrent_op.h
浏览文件 @
927adb4a
...
@@ -19,7 +19,7 @@
...
@@ -19,7 +19,7 @@
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
using
namespace
paddle
::
framework
;
using
namespace
paddle
::
framework
;
// NOLINT
namespace
rnn
{
namespace
rnn
{
...
@@ -72,26 +72,29 @@ struct ArgumentName {
...
@@ -72,26 +72,29 @@ struct ArgumentName {
*/
*/
void
SegmentInputs
(
const
std
::
vector
<
Scope
*>&
step_scopes
,
void
SegmentInputs
(
const
std
::
vector
<
Scope
*>&
step_scopes
,
const
std
::
vector
<
Link
>&
inlinks
,
const
std
::
vector
<
Link
>&
inlinks
,
const
size_t
seq_len
);
const
size_t
seq_len
,
bool
infer_shape_mode
);
/**
/**
* Process outputs of step nets and merge to variables.
* Process outputs of step nets and merge to variables.
*/
*/
void
ConcatOutputs
(
const
std
::
vector
<
Scope
*>&
step_scopes
,
void
ConcatOutputs
(
const
std
::
vector
<
Scope
*>&
step_scopes
,
const
std
::
vector
<
Link
>&
outlinks
,
const
std
::
vector
<
Link
>&
outlinks
,
const
size_t
seq_len
);
const
size_t
seq_len
,
bool
infer_shape_mode
);
void
LinkMemories
(
const
std
::
vector
<
Scope
*>&
step_scopes
,
void
LinkMemories
(
const
std
::
vector
<
Scope
*>&
step_scopes
,
const
std
::
vector
<
MemoryAttr
>&
memories
,
const
std
::
vector
<
MemoryAttr
>&
memories
,
size_t
step_id
,
const
size_t
step_id
,
int
offset
);
const
int
offset
,
bool
infer_shape_mode
);
void
InitArgument
(
const
ArgumentName
&
name
,
Argument
*
arg
);
void
InitArgument
(
const
ArgumentName
&
name
,
Argument
*
arg
);
};
// namespace rnn
};
// namespace rnn
// The sequence format in RecurrentOp is Tensor<seq_len, batch_size, dim> now.
// The sequence format in RecurrentOp is Tensor<seq_len, batch_size, dim> now.
// TODO:
// TODO
(Yan Chunwei)
:
// 1. No-padding computing for sequences with indifinite length in one batch.
// 1. No-padding computing for sequences with indifinite length in one batch.
// 2. Hierarchical RNN for sequence with sub-sequence.
// 2. Hierarchical RNN for sequence with sub-sequence.
// 3. Internal Memory.
// 3. Internal Memory.
...
@@ -122,7 +125,7 @@ protected:
...
@@ -122,7 +125,7 @@ protected:
return
*
scope
.
FindVar
(
arg_
->
step_scopes
)
->
GetMutable
<
std
::
vector
<
Scope
*>>
();
return
*
scope
.
FindVar
(
arg_
->
step_scopes
)
->
GetMutable
<
std
::
vector
<
Scope
*>>
();
}
}
void
InitMemories
(
Scope
*
step_scopes
)
const
;
void
InitMemories
(
Scope
*
step_scopes
,
bool
infer_shape_mode
)
const
;
private:
private:
std
::
unique_ptr
<
rnn
::
Argument
>
arg_
;
std
::
unique_ptr
<
rnn
::
Argument
>
arg_
;
...
@@ -145,7 +148,7 @@ public:
...
@@ -145,7 +148,7 @@ public:
void
Run
(
const
Scope
&
scope
,
const
platform
::
DeviceContext
&
dev_ctx
)
const
;
void
Run
(
const
Scope
&
scope
,
const
platform
::
DeviceContext
&
dev_ctx
)
const
;
void
LinkBootMemoryGradients
(
Scope
*
step_scopes
)
const
;
void
LinkBootMemoryGradients
(
Scope
*
step_scopes
,
bool
infer_shape_mode
)
const
;
/**
/**
* InferShape must be called before Run.
* InferShape must be called before Run.
...
@@ -169,12 +172,10 @@ public:
...
@@ -169,12 +172,10 @@ public:
/**
/**
* InferShape must be called before Run.
* InferShape must be called before Run.
*/
*/
virtual
void
InferShape
(
const
Scope
&
scope
)
const
override
{
void
InferShape
(
const
Scope
&
scope
)
const
override
{
alg_
.
InferShape
(
scope
);
}
alg_
.
InferShape
(
scope
);
}
v
irtual
v
oid
Run
(
const
Scope
&
scope
,
void
Run
(
const
Scope
&
scope
,
const
platform
::
DeviceContext
&
dev_ctx
)
const
override
{
const
platform
::
DeviceContext
&
dev_ctx
)
const
override
{
alg_
.
Run
(
scope
,
dev_ctx
);
alg_
.
Run
(
scope
,
dev_ctx
);
}
}
...
@@ -191,12 +192,10 @@ public:
...
@@ -191,12 +192,10 @@ public:
/**
/**
* InferShape must be called before Run.
* InferShape must be called before Run.
*/
*/
virtual
void
InferShape
(
const
Scope
&
scope
)
const
override
{
void
InferShape
(
const
Scope
&
scope
)
const
override
{
alg_
.
InferShape
(
scope
);
}
alg_
.
InferShape
(
scope
);
}
v
irtual
v
oid
Run
(
const
Scope
&
scope
,
void
Run
(
const
Scope
&
scope
,
const
platform
::
DeviceContext
&
dev_ctx
)
const
override
{
const
platform
::
DeviceContext
&
dev_ctx
)
const
override
{
alg_
.
Run
(
scope
,
dev_ctx
);
alg_
.
Run
(
scope
,
dev_ctx
);
}
}
...
...
paddle/operators/recurrent_
network_
op_test.cc
→
paddle/operators/recurrent_op_test.cc
浏览文件 @
927adb4a
...
@@ -11,14 +11,15 @@
...
@@ -11,14 +11,15 @@
limitations under the License.
limitations under the License.
*/
*/
#include "paddle/operators/recurrent_op.h"
#include <glog/logging.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include "paddle/framework/net.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
#include "paddle/framework/operator.h"
#include "paddle/framework/tensor.h"
#include "paddle/framework/tensor.h"
#include "paddle/operators/
recurrent_network
_op.h"
#include "paddle/operators/
net
_op.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -55,7 +56,7 @@ protected:
...
@@ -55,7 +56,7 @@ protected:
w
->
GetMutable
<
Tensor
>
()
->
mutable_data
<
float
>
(
w
->
GetMutable
<
Tensor
>
()
->
mutable_data
<
float
>
(
make_ddim
(
std
::
vector
<
int
>
{
30
,
30
}),
platform
::
CPUPlace
());
make_ddim
(
std
::
vector
<
int
>
{
30
,
30
}),
platform
::
CPUPlace
());
for
(
auto
boot
:
std
::
vector
<
std
::
string
>
{
"
x_boot"
,
"
h_boot"
})
{
for
(
auto
boot
:
std
::
vector
<
std
::
string
>
{
"h_boot"
})
{
LOG
(
INFO
)
<<
"create global variable "
<<
boot
;
LOG
(
INFO
)
<<
"create global variable "
<<
boot
;
Variable
*
h_boot
=
scope_
.
NewVar
(
boot
);
Variable
*
h_boot
=
scope_
.
NewVar
(
boot
);
h_boot
->
GetMutable
<
Tensor
>
()
->
mutable_data
<
float
>
(
h_boot
->
GetMutable
<
Tensor
>
()
->
mutable_data
<
float
>
(
...
@@ -79,7 +80,6 @@ protected:
...
@@ -79,7 +80,6 @@ protected:
op_desc
.
add_inputs
(
"x0"
);
op_desc
.
add_inputs
(
"x0"
);
op_desc
.
add_inputs
(
"x1"
);
op_desc
.
add_inputs
(
"x1"
);
// boot_memories 3
// boot_memories 3
op_desc
.
add_inputs
(
"x_boot"
);
op_desc
.
add_inputs
(
"h_boot"
);
op_desc
.
add_inputs
(
"h_boot"
);
// step net 5
// step net 5
op_desc
.
add_inputs
(
"step_net"
);
op_desc
.
add_inputs
(
"step_net"
);
...
@@ -91,7 +91,7 @@ protected:
...
@@ -91,7 +91,7 @@ protected:
auto
_input_format
=
std
::
vector
<
int
>
{
auto
_input_format
=
std
::
vector
<
int
>
{
0
,
// in_link
0
,
// in_link
3
,
// memories
3
,
// memories
5
// step_net
4
// step_net
};
};
auto
input_format
=
op_desc
.
add_attrs
();
auto
input_format
=
op_desc
.
add_attrs
();
input_format
->
set_name
(
"input_format"
);
input_format
->
set_name
(
"input_format"
);
...
@@ -129,12 +129,11 @@ protected:
...
@@ -129,12 +129,11 @@ protected:
inlink_alias
->
add_strings
(
item
);
inlink_alias
->
add_strings
(
item
);
}
}
// pre memories
// pre memories
for
(
const
auto
&
item
:
for
(
const
auto
&
item
:
std
::
vector
<
std
::
string
>
{
"rnn/h@pre"
})
{
std
::
vector
<
std
::
string
>
{
"rnn/x@pre"
,
"rnn/h@pre"
})
{
pre_memories
->
add_strings
(
item
);
pre_memories
->
add_strings
(
item
);
}
}
// memories
// memories
for
(
const
auto
&
item
:
std
::
vector
<
std
::
string
>
{
"rnn/
x"
,
"rnn/
h"
})
{
for
(
const
auto
&
item
:
std
::
vector
<
std
::
string
>
{
"rnn/h"
})
{
memories
->
add_strings
(
item
);
memories
->
add_strings
(
item
);
}
}
// output alias
// output alias
...
@@ -151,14 +150,11 @@ protected:
...
@@ -151,14 +150,11 @@ protected:
LOG
(
INFO
)
<<
"create variable step_net"
;
LOG
(
INFO
)
<<
"create variable step_net"
;
Variable
*
var
=
scope_
.
NewVar
(
"step_net"
);
Variable
*
var
=
scope_
.
NewVar
(
"step_net"
);
auto
net
=
var
->
GetMutable
<
NetOp
>
();
auto
net
=
var
->
GetMutable
<
NetOp
>
();
// rnn/s is net's input or output?
net
->
inputs_
=
{
"rnn/h@pre"
,
"rnn/w"
,
"rnn/x"
};
net
->
inputs_
=
{
"rnn/s"
,
"rnn/h"
};
net
->
AddOp
(
net
->
AddOp
(
OpRegistry
::
CreateOp
(
"mul"
,
{
"rnn/h@pre"
,
"rnn/w"
},
{
"rnn/s"
},
{}));
OpRegistry
::
CreateOp
(
"mul"
,
{
"rnn/h@pre"
,
"rnn/w"
},
{
"rnn/s"
},
{}));
net
->
AddOp
(
net
->
AddOp
(
OpRegistry
::
CreateOp
(
"add_two"
,
{
"
rnn/x
"
,
"rnn/s"
},
{
"rnn/h"
},
{}));
OpRegistry
::
CreateOp
(
"add_two"
,
{
"
x@alias
"
,
"rnn/s"
},
{
"rnn/h"
},
{}));
net
->
CompleteAddOp
();
net
->
CompleteAddOp
();
}
}
...
@@ -297,7 +293,10 @@ protected:
...
@@ -297,7 +293,10 @@ protected:
inlink
.
internal
=
"rnn/x"
;
inlink
.
internal
=
"rnn/x"
;
auto
step_scopes
=
auto
step_scopes
=
scope_
.
FindVar
(
"step_scopes"
)
->
GetMutable
<
std
::
vector
<
Scope
*>>
();
scope_
.
FindVar
(
"step_scopes"
)
->
GetMutable
<
std
::
vector
<
Scope
*>>
();
rnn
::
SegmentInputs
(
*
step_scopes
,
std
::
vector
<
rnn
::
Link
>
{
inlink
},
10
);
rnn
::
SegmentInputs
(
*
step_scopes
,
std
::
vector
<
rnn
::
Link
>
{
inlink
},
10
,
true
/*infer_shape_mode*/
);
}
}
void
LinkeMemories
()
{
void
LinkeMemories
()
{
...
@@ -311,7 +310,8 @@ protected:
...
@@ -311,7 +310,8 @@ protected:
auto
step_scopes
=
auto
step_scopes
=
scope_
.
FindVar
(
"step_scopes"
)
->
GetMutable
<
std
::
vector
<
Scope
*>>
();
scope_
.
FindVar
(
"step_scopes"
)
->
GetMutable
<
std
::
vector
<
Scope
*>>
();
for
(
int
i
=
1
;
i
<
10
;
++
i
)
{
for
(
int
i
=
1
;
i
<
10
;
++
i
)
{
rnn
::
LinkMemories
(
*
step_scopes
,
memories
,
i
,
-
1
);
rnn
::
LinkMemories
(
*
step_scopes
,
memories
,
i
,
-
1
,
true
/*infer_shape_mode*/
);
}
}
}
}
...
@@ -333,14 +333,14 @@ TEST(RecurrentOp, LinkMemories) {
...
@@ -333,14 +333,14 @@ TEST(RecurrentOp, LinkMemories) {
using
namespace
paddle
::
operators
;
using
namespace
paddle
::
operators
;
// create and init step scopes
// create and init step scopes
in
t
len
=
10
;
size_
t
len
=
10
;
std
::
vector
<
Scope
*>
step_scopes
;
std
::
vector
<
Scope
*>
step_scopes
;
for
(
in
t
i
=
0
;
i
<
len
;
++
i
)
{
for
(
size_
t
i
=
0
;
i
<
len
;
++
i
)
{
auto
scope
=
new
Scope
();
auto
scope
=
new
Scope
();
scope
->
NewVar
(
"pre_h"
);
scope
->
NewVar
(
"pre_h"
);
auto
tensor
=
scope
->
NewVar
(
"h"
)
->
GetMutable
<
Tensor
>
();
auto
tensor
=
scope
->
NewVar
(
"h"
)
->
GetMutable
<
Tensor
>
();
float
*
data
=
tensor
->
mutable_data
<
float
>
({
15
,
20
},
CPUPlace
());
float
*
data
=
tensor
->
mutable_data
<
float
>
({
15
,
20
},
CPUPlace
());
for
(
in
t
j
=
0
;
j
<
15
*
20
;
++
j
)
{
for
(
size_
t
j
=
0
;
j
<
15
*
20
;
++
j
)
{
data
[
j
]
=
rand
()
*
(
1.
/
(
double
)
RAND_MAX
);
data
[
j
]
=
rand
()
*
(
1.
/
(
double
)
RAND_MAX
);
}
}
step_scopes
.
push_back
(
scope
);
step_scopes
.
push_back
(
scope
);
...
@@ -354,24 +354,24 @@ TEST(RecurrentOp, LinkMemories) {
...
@@ -354,24 +354,24 @@ TEST(RecurrentOp, LinkMemories) {
std
::
vector
<
rnn
::
MemoryAttr
>
memories
;
std
::
vector
<
rnn
::
MemoryAttr
>
memories
;
memories
.
push_back
(
mem_attr
);
memories
.
push_back
(
mem_attr
);
for
(
in
t
i
=
1
;
i
<
len
;
++
i
)
{
for
(
size_
t
i
=
1
;
i
<
len
;
++
i
)
{
rnn
::
LinkMemories
(
step_scopes
,
memories
,
i
,
-
1
);
rnn
::
LinkMemories
(
step_scopes
,
memories
,
i
,
-
1
,
false
/*infer_shape_mode*/
);
}
}
// check
// check
for
(
in
t
i
=
0
;
i
<
len
-
1
;
++
i
)
{
for
(
size_
t
i
=
0
;
i
<
len
-
1
;
++
i
)
{
const
float
*
a
=
const
float
*
a
=
step_scopes
[
i
]
->
FindVar
(
"h"
)
->
GetMutable
<
Tensor
>
()
->
data
<
float
>
();
step_scopes
[
i
]
->
FindVar
(
"h"
)
->
GetMutable
<
Tensor
>
()
->
data
<
float
>
();
const
float
*
b
=
step_scopes
[
i
+
1
]
const
float
*
b
=
step_scopes
[
i
+
1
]
->
FindVar
(
"pre_h"
)
->
FindVar
(
"pre_h"
)
->
GetMutable
<
Tensor
>
()
->
GetMutable
<
Tensor
>
()
->
data
<
float
>
();
->
data
<
float
>
();
for
(
size_t
i
=
0
;
i
<
15
*
20
;
++
i
)
{
for
(
size_t
j
=
0
;
j
<
15
*
20
;
++
j
)
{
ASSERT_FLOAT_EQ
(
a
[
i
],
b
[
i
]);
ASSERT_FLOAT_EQ
(
a
[
j
],
b
[
j
]);
}
}
}
}
for
(
int
i
=
len
-
2
;
i
>=
0
;
--
i
)
{
for
(
int
i
=
len
-
2
;
i
>=
0
;
--
i
)
{
rnn
::
LinkMemories
(
step_scopes
,
memories
,
i
,
1
);
rnn
::
LinkMemories
(
step_scopes
,
memories
,
i
,
1
,
false
/*infer_shape_mode*/
);
}
}
// check
// check
for
(
int
i
=
len
-
2
;
i
>=
0
;
--
i
)
{
for
(
int
i
=
len
-
2
;
i
>=
0
;
--
i
)
{
...
@@ -379,8 +379,8 @@ TEST(RecurrentOp, LinkMemories) {
...
@@ -379,8 +379,8 @@ TEST(RecurrentOp, LinkMemories) {
step_scopes
[
i
]
->
FindVar
(
"pre_h"
)
->
GetMutable
<
Tensor
>
()
->
data
<
float
>
();
step_scopes
[
i
]
->
FindVar
(
"pre_h"
)
->
GetMutable
<
Tensor
>
()
->
data
<
float
>
();
const
float
*
b
=
const
float
*
b
=
step_scopes
[
i
+
1
]
->
FindVar
(
"h"
)
->
GetMutable
<
Tensor
>
()
->
data
<
float
>
();
step_scopes
[
i
+
1
]
->
FindVar
(
"h"
)
->
GetMutable
<
Tensor
>
()
->
data
<
float
>
();
for
(
size_t
i
=
0
;
i
<
15
*
20
;
++
i
)
{
for
(
size_t
j
=
0
;
j
<
15
*
20
;
++
j
)
{
ASSERT_FLOAT_EQ
(
a
[
i
],
b
[
i
]);
ASSERT_FLOAT_EQ
(
a
[
j
],
b
[
j
]);
}
}
}
}
...
@@ -391,9 +391,3 @@ TEST(RecurrentOp, LinkMemories) {
...
@@ -391,9 +391,3 @@ TEST(RecurrentOp, LinkMemories) {
USE_OP
(
add_two
);
USE_OP
(
add_two
);
USE_OP
(
mul
);
USE_OP
(
mul
);
// int main() {
// //! TODO(yuyang18): Temporary disable this unit-test because implementation
// //! error.
// return 0;
//}
\ No newline at end of file
paddle/operators/rowwise_add_op.cu
浏览文件 @
927adb4a
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#define EIGEN_USE_GPU
#include "paddle/operators/rowwise_add_op.h"
#include "paddle/operators/rowwise_add_op.h"
...
...
paddle/operators/sgd_op.cu
浏览文件 @
927adb4a
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#define EIGEN_USE_GPU
#include "paddle/operators/sgd_op.h"
#include "paddle/operators/sgd_op.h"
REGISTER_OP_GPU_KERNEL
(
sgd
,
ops
::
SGDOpKernel
<
ops
::
GPUPlace
,
float
>
);
REGISTER_OP_GPU_KERNEL
(
sgd
,
ops
::
SGDOpKernel
<
ops
::
GPUPlace
,
float
>
);
\ No newline at end of file
paddle/operators/sgd_op.h
浏览文件 @
927adb4a
...
@@ -29,8 +29,12 @@ public:
...
@@ -29,8 +29,12 @@ public:
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
EigenVector
<
T
>::
Flatten
(
*
param_out
).
device
(
ctx
.
GetEigenDevice
<
Place
>
())
=
auto
p
=
EigenVector
<
T
>::
Flatten
(
*
param
);
EigenVector
<
T
>::
Flatten
(
*
param
)
-
lr
*
EigenVector
<
T
>::
Flatten
(
*
grad
);
auto
g
=
EigenVector
<
T
>::
Flatten
(
*
grad
);
auto
o
=
EigenVector
<
T
>::
Flatten
(
*
param_out
);
auto
place
=
ctx
.
GetEigenDevice
<
Place
>
();
o
.
device
(
place
)
=
p
-
lr
*
g
;
}
}
};
};
...
...
paddle/operators/sigmoid_op.cu
浏览文件 @
927adb4a
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#define EIGEN_USE_GPU
#include "paddle/operators/sigmoid_op.h"
#include "paddle/operators/sigmoid_op.h"
...
...
paddle/operators/sigmoid_op.h
浏览文件 @
927adb4a
...
@@ -27,8 +27,11 @@ public:
...
@@ -27,8 +27,11 @@ public:
auto
output
=
context
.
Output
<
Tensor
>
(
0
);
auto
output
=
context
.
Output
<
Tensor
>
(
0
);
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
EigenVector
<
T
>::
Flatten
(
*
output
).
device
(
context
.
GetEigenDevice
<
Place
>
())
=
auto
X
=
EigenVector
<
T
>::
Flatten
(
*
input
);
1.0
/
(
1.0
+
(
-
1.0
*
EigenVector
<
T
>::
Flatten
(
*
input
)).
exp
());
auto
Y
=
EigenVector
<
T
>::
Flatten
(
*
output
);
auto
place
=
context
.
GetEigenDevice
<
Place
>
();
Y
.
device
(
place
)
=
1.0
/
(
1.0
+
(
-
1.0
*
X
).
exp
());
}
}
};
};
}
// namespace operators
}
// namespace operators
...
...
paddle/operators/softmax_op.cc
浏览文件 @
927adb4a
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/softmax_op.h"
#include "paddle/operators/softmax_op.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -19,12 +20,13 @@ namespace operators {
...
@@ -19,12 +20,13 @@ namespace operators {
class
SoftmaxOp
:
public
OperatorWithKernel
{
class
SoftmaxOp
:
public
OperatorWithKernel
{
protected:
protected:
void
InferShape
(
const
InferShapeContext
&
ctx
)
const
override
{
void
InferShape
(
const
InferShapeContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
.
InputSize
()
==
1
,
"Only one input is need for softmax"
);
PADDLE_ENFORCE
(
ctx
.
InputSize
()
==
1UL
,
PADDLE_ENFORCE
(
ctx
.
Input
<
Tensor
>
(
0
)
->
dims
().
size
()
==
2
,
"Only one input is need for softmax"
);
PADDLE_ENFORCE
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
dims
().
size
()
==
2UL
,
"The input of softmax op must be matrix"
);
"The input of softmax op must be matrix"
);
PADDLE_ENFORCE
(
ctx
.
OutputSize
()
==
1
,
PADDLE_ENFORCE
(
ctx
.
OutputSize
()
==
1
UL
,
"Only one output is need for softmax"
);
"Only one output is need for softmax"
);
ctx
.
Output
<
Tensor
>
(
0
)
->
Resize
(
ctx
.
Input
<
Tensor
>
(
0
)
->
dims
());
ctx
.
Output
<
Tensor
>
(
"Y"
)
->
Resize
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
dims
());
}
}
};
};
...
@@ -40,10 +42,19 @@ public:
...
@@ -40,10 +42,19 @@ public:
class
SoftmaxOpGrad
:
public
OperatorWithKernel
{
class
SoftmaxOpGrad
:
public
OperatorWithKernel
{
protected:
protected:
void
InferShape
(
const
InferShapeContext
&
ctx
)
const
override
{}
void
InferShape
(
const
InferShapeContext
&
ctx
)
const
override
{
std
::
string
DebugString
()
const
override
{
PADDLE_ENFORCE
(
ctx
.
InputSize
()
==
3UL
,
LOG
(
INFO
)
<<
"SoftmaxOpGrad"
;
"Input of SoftmaxOpGrad should be 3, X, Y, YG"
);
return
""
;
PADDLE_ENFORCE
(
ctx
.
OutputSize
()
==
1UL
,
"Output of SoftmaxOpGrad should be 1"
);
PADDLE_ENFORCE
(
ctx
.
InputVar
(
"Y"
)
!=
nullptr
,
"Input(Y) should not be null"
);
PADDLE_ENFORCE
(
ctx
.
InputVar
(
GRAD_VAR_NAME
(
"Y"
))
!=
nullptr
,
"Input(Y@GRAD) should not be null"
);
PADDLE_ENFORCE
(
ctx
.
Input
<
Tensor
>
(
"Y"
)
->
dims
()
==
ctx
.
Input
<
Tensor
>
(
GRAD_VAR_NAME
(
"Y"
))
->
dims
(),
"the shape of Input(0) and Input(1) should be the same"
);
ctx
.
Output
<
Tensor
>
(
GRAD_VAR_NAME
(
"X"
))
->
Resize
(
ctx
.
Input
<
Tensor
>
(
"Y"
)
->
dims
());
}
}
};
};
...
@@ -51,5 +62,7 @@ protected:
...
@@ -51,5 +62,7 @@ protected:
}
// namespace paddle
}
// namespace paddle
REGISTER_OP
(
softmax
,
ops
::
SoftmaxOp
,
ops
::
SoftmaxOpMaker
);
REGISTER_OP
(
softmax
,
ops
::
SoftmaxOp
,
ops
::
SoftmaxOpMaker
);
REGISTER_GRADIENT_OP
(
softmax
,
softmax_grad
,
ops
::
SoftmaxOpGrad
);
REGISTER_OP_CPU_KERNEL
(
softmax
,
ops
::
SoftmaxKernel
<
ops
::
CPUPlace
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
softmax
,
ops
::
SoftmaxKernel
<
ops
::
CPUPlace
,
float
>
);
REGISTER_GRADIENT_OP
(
softmax
,
softmax_grad
,
ops
::
SoftmaxOpGrad
);
REGISTER_OP_CPU_KERNEL
(
softmax_grad
,
ops
::
SoftmaxGradKernel
<
ops
::
CPUPlace
,
float
>
);
paddle/operators/softmax_op.cu
浏览文件 @
927adb4a
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#define EIGEN_USE_GPU
#include "paddle/framework/op_registry.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/softmax_op.h"
#include "paddle/operators/softmax_op.h"
REGISTER_OP_GPU_KERNEL
(
softmax
,
ops
::
SoftmaxKernel
<
ops
::
GPUPlace
,
float
>
);
REGISTER_OP_GPU_KERNEL
(
softmax
,
ops
::
SoftmaxKernel
<
ops
::
GPUPlace
,
float
>
);
REGISTER_OP_GPU_KERNEL
(
softmax_grad
,
ops
::
SoftmaxGradKernel
<
ops
::
GPUPlace
,
float
>
);
paddle/operators/softmax_op.h
浏览文件 @
927adb4a
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#include "paddle/framework/ddim.h"
#include "paddle/framework/operator.h"
#include "paddle/framework/tensor.h"
#include "paddle/operators/type_alias.h"
#include "paddle/operators/type_alias.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -23,8 +26,8 @@ template <typename Place, typename T>
...
@@ -23,8 +26,8 @@ template <typename Place, typename T>
class
SoftmaxKernel
:
public
OpKernel
{
class
SoftmaxKernel
:
public
OpKernel
{
public:
public:
void
Compute
(
const
ExecutionContext
&
context
)
const
override
{
void
Compute
(
const
ExecutionContext
&
context
)
const
override
{
auto
input
=
context
.
Input
<
Tensor
>
(
0
);
auto
input
=
context
.
Input
<
Tensor
>
(
"X"
);
auto
output
=
context
.
Output
<
Tensor
>
(
0
);
auto
output
=
context
.
Output
<
Tensor
>
(
"Y"
);
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
logits
=
EigenMatrix
<
T
>::
From
(
*
input
);
auto
logits
=
EigenMatrix
<
T
>::
From
(
*
input
);
...
@@ -57,5 +60,38 @@ public:
...
@@ -57,5 +60,38 @@ public:
.
broadcast
(
one_by_class
));
.
broadcast
(
one_by_class
));
}
}
};
};
template
<
typename
Place
,
typename
T
>
class
SoftmaxGradKernel
:
public
OpKernel
{
public:
void
Compute
(
const
ExecutionContext
&
context
)
const
override
{
std
::
shared_ptr
<
Tensor
>
scale_
=
std
::
make_shared
<
Tensor
>
();
auto
Y
=
context
.
Input
<
Tensor
>
(
"Y"
);
auto
dY
=
context
.
Input
<
Tensor
>
(
OperatorBase
::
GRAD_VAR_NAME
(
"Y"
));
auto
dX
=
context
.
Output
<
Tensor
>
(
OperatorBase
::
GRAD_VAR_NAME
(
"X"
));
dX
->
mutable_data
<
T
>
(
context
.
GetPlace
());
const
int
batch_size
=
Y
->
dims
()[
0
];
const
int
class_num
=
Y
->
dims
()[
1
];
Eigen
::
DSizes
<
int
,
1
>
along_class
(
1
);
Eigen
::
DSizes
<
int
,
2
>
batch_by_one
(
batch_size
,
1
);
Eigen
::
DSizes
<
int
,
2
>
one_by_class
(
1
,
class_num
);
auto
Y_eigen
=
EigenMatrix
<
T
>::
From
(
*
Y
);
auto
dY_eigen
=
EigenMatrix
<
T
>::
From
(
*
dY
);
auto
dX_eigen
=
EigenMatrix
<
T
>::
From
(
*
dX
);
auto
place
=
context
.
GetEigenDevice
<
Place
>
();
auto
dot
=
(
Y_eigen
*
dY_eigen
)
.
sum
(
along_class
)
.
eval
()
.
reshape
(
batch_by_one
)
.
broadcast
(
one_by_class
);
dX_eigen
.
device
(
place
)
=
(
dY_eigen
-
dot
)
*
Y_eigen
;
}
};
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
paddle/operators/type_alias.h
浏览文件 @
927adb4a
...
@@ -15,13 +15,14 @@
...
@@ -15,13 +15,14 @@
#pragma once
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/net.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/net_op.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
using
OpKernel
=
framework
::
OpKernel
;
using
OpKernel
=
framework
::
OpKernel
;
using
OperatorBase
=
framework
::
OperatorBase
;
using
InferShapeContext
=
framework
::
InferShapeContext
;
using
InferShapeContext
=
framework
::
InferShapeContext
;
using
ExecutionContext
=
framework
::
ExecutionContext
;
using
ExecutionContext
=
framework
::
ExecutionContext
;
using
Variable
=
framework
::
Variable
;
using
Variable
=
framework
::
Variable
;
...
@@ -43,14 +44,16 @@ template <typename T,
...
@@ -43,14 +44,16 @@ template <typename T,
typename
IndexType
=
Eigen
::
DenseIndex
>
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenTensor
=
framework
::
EigenTensor
<
T
,
D
,
MajorType
,
IndexType
>
;
using
EigenTensor
=
framework
::
EigenTensor
<
T
,
D
,
MajorType
,
IndexType
>
;
using
Tensor
=
framework
::
Tensor
;
using
Tensor
=
framework
::
Tensor
;
using
Scope
=
framework
::
Scope
;
using
OperatorWithKernel
=
framework
::
OperatorWithKernel
;
using
OperatorWithKernel
=
framework
::
OperatorWithKernel
;
using
OperatorBase
=
framework
::
OperatorBase
;
using
OpProtoAndCheckerMaker
=
framework
::
OpProtoAndCheckerMaker
;
using
OpProtoAndCheckerMaker
=
framework
::
OpProtoAndCheckerMaker
;
using
OpProto
=
framework
::
OpProto
;
using
OpProto
=
framework
::
OpProto
;
using
OpAttrChecker
=
framework
::
OpAttrChecker
;
using
OpAttrChecker
=
framework
::
OpAttrChecker
;
using
CPUPlace
=
platform
::
CPUPlace
;
using
CPUPlace
=
platform
::
CPUPlace
;
using
GPUPlace
=
platform
::
GPUPlace
;
using
GPUPlace
=
platform
::
GPUPlace
;
using
NetOp
=
framework
::
NetOp
;
using
OpRegistry
=
framework
::
OpRegistry
;
using
OpRegistry
=
framework
::
OpRegistry
;
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
...
...
paddle/platform/device_context.h
浏览文件 @
927adb4a
...
@@ -40,7 +40,7 @@ class DeviceContext {
...
@@ -40,7 +40,7 @@ class DeviceContext {
class
CPUDeviceContext
:
public
DeviceContext
{
class
CPUDeviceContext
:
public
DeviceContext
{
public:
public:
CPUDeviceContext
();
CPUDeviceContext
();
CPUDeviceContext
(
CPUPlace
);
explicit
CPUDeviceContext
(
CPUPlace
);
virtual
~
CPUDeviceContext
()
{}
virtual
~
CPUDeviceContext
()
{}
Eigen
::
DefaultDevice
*
eigen_device
()
const
;
Eigen
::
DefaultDevice
*
eigen_device
()
const
;
...
@@ -55,7 +55,7 @@ class CPUDeviceContext : public DeviceContext {
...
@@ -55,7 +55,7 @@ class CPUDeviceContext : public DeviceContext {
class
CUDADeviceContext
:
public
DeviceContext
{
class
CUDADeviceContext
:
public
DeviceContext
{
public:
public:
explicit
CUDADeviceContext
(
GPUPlace
);
CUDADeviceContext
(
GPUPlace
);
// NOLINT
virtual
~
CUDADeviceContext
();
virtual
~
CUDADeviceContext
();
/*! \brief Wait for all operations completion in the stream. */
/*! \brief Wait for all operations completion in the stream. */
...
@@ -69,10 +69,10 @@ class CUDADeviceContext : public DeviceContext {
...
@@ -69,10 +69,10 @@ class CUDADeviceContext : public DeviceContext {
// clang-format off
// clang-format off
/*! \brief Return cublas handle in the device context. */
/*! \brief Return cublas handle in the device context. */
cublasHandle_t
cublas_handle
();
cublasHandle_t
cublas_handle
();
/*! \brief Return cudnn handle in the device context. */
/*! \brief Return cudnn handle in the device context. */
cudnnHandle_t
cudnn_handle
();
cudnnHandle_t
cudnn_handle
();
/*! \brief Return curand handle in the device context. */
/*! \brief Return curand handle in the device context. */
curandGenerator_t
curand_generator
();
curandGenerator_t
curand_generator
();
...
...
paddle/platform/dynload/cublas.cc
浏览文件 @
927adb4a
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <paddle/platform/dynload/cublas.h>
#include <paddle/platform/dynload/cublas.h>
namespace
paddle
{
namespace
paddle
{
...
...
paddle/platform/dynload/cudnn.cc
浏览文件 @
927adb4a
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <paddle/platform/dynload/cudnn.h>
#include <paddle/platform/dynload/cudnn.h>
namespace
paddle
{
namespace
paddle
{
...
@@ -25,4 +39,4 @@ CUDNN_DNN_ROUTINE_EACH_R5(DEFINE_WRAP);
...
@@ -25,4 +39,4 @@ CUDNN_DNN_ROUTINE_EACH_R5(DEFINE_WRAP);
}
// namespace dynload
}
// namespace dynload
}
// namespace platform
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
\ No newline at end of file
paddle/platform/dynload/curand.cc
浏览文件 @
927adb4a
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <paddle/platform/dynload/curand.h>
#include <paddle/platform/dynload/curand.h>
namespace
paddle
{
namespace
paddle
{
...
@@ -10,6 +24,7 @@ void *curand_dso_handle;
...
@@ -10,6 +24,7 @@ void *curand_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
#define DEFINE_WRAP(__name) DynLoad__##__name __name
CURAND_RAND_ROUTINE_EACH
(
DEFINE_WRAP
);
CURAND_RAND_ROUTINE_EACH
(
DEFINE_WRAP
);
}
}
}
// namespace dynload
}
}
// namespace platform
\ No newline at end of file
}
// namespace paddle
paddle/platform/place.h
浏览文件 @
927adb4a
...
@@ -32,7 +32,7 @@ struct CPUPlace {
...
@@ -32,7 +32,7 @@ struct CPUPlace {
struct
GPUPlace
{
struct
GPUPlace
{
GPUPlace
()
:
GPUPlace
(
0
)
{}
GPUPlace
()
:
GPUPlace
(
0
)
{}
GPUPlace
(
int
d
)
:
device
(
d
)
{}
GPUPlace
(
int
d
)
:
device
(
d
)
{}
// NOLINT
// needed for variant equality comparison
// needed for variant equality comparison
inline
bool
operator
==
(
const
GPUPlace
&
o
)
const
{
return
device
==
o
.
device
;
}
inline
bool
operator
==
(
const
GPUPlace
&
o
)
const
{
return
device
==
o
.
device
;
}
...
...
paddle/pybind/CMakeLists.txt
浏览文件 @
927adb4a
...
@@ -6,4 +6,4 @@ cc_library(paddle_pybind SHARED
...
@@ -6,4 +6,4 @@ cc_library(paddle_pybind SHARED
add_op
add_op
mean_op
mean_op
cross_entropy_op
cross_entropy_op
recurrent_
network_
op
)
recurrent_op
)
paddle/scripts/docker/build.sh
浏览文件 @
927adb4a
...
@@ -69,7 +69,7 @@ cat <<EOF
...
@@ -69,7 +69,7 @@ cat <<EOF
Installing ...
Installing ...
========================================
========================================
EOF
EOF
make
install
make
install
-j
`
nproc
`
pip
install
/usr/local/opt/paddle/share/wheels/
*
.whl
pip
install
/usr/local/opt/paddle/share/wheels/
*
.whl
paddle version
paddle version
...
@@ -122,7 +122,7 @@ cat <<EOF
...
@@ -122,7 +122,7 @@ cat <<EOF
Generating .deb package ...
Generating .deb package ...
========================================
========================================
EOF
EOF
cpack
-D
CPACK_GENERATOR
=
'DEB'
..
cpack
-D
CPACK_GENERATOR
=
'DEB'
-j
`
nproc
`
..
cat
<<
EOF
cat
<<
EOF
...
...
paddle/scripts/travis/build_doc.sh
浏览文件 @
927adb4a
...
@@ -6,14 +6,14 @@ mkdir -p $TRAVIS_BUILD_DIR/build
...
@@ -6,14 +6,14 @@ mkdir -p $TRAVIS_BUILD_DIR/build
cd
$TRAVIS_BUILD_DIR
/build
cd
$TRAVIS_BUILD_DIR
/build
# Compile paddle binaries first
# Compile paddle binaries first
cmake ..
-DCMAKE_BUILD_TYPE
=
Debug
-DWITH_GPU
=
OFF
-DWITH_DOC
=
OFF
-DWITH_GOLANG
=
ON
-DWITH_STYLE_CHECK
=
OFF
cmake ..
-DCMAKE_BUILD_TYPE
=
Debug
-DWITH_GPU
=
OFF
-DWITH_DOC
=
OFF
-DWITH_
MKLDNN
=
OFF
-DWITH_MKLML
=
OFF
-DWITH_
GOLANG
=
ON
-DWITH_STYLE_CHECK
=
OFF
mkdir
output
mkdir
output
make
-j
`
nproc
`
make
-j
`
nproc
`
find ..
-name
'*whl'
| xargs pip
install
# install all wheels.
find ..
-name
'*whl'
| xargs pip
install
# install all wheels.
rm
-rf
*
rm
-rf
*
# Compile Documentation only.
# Compile Documentation only.
cmake ..
-DCMAKE_BUILD_TYPE
=
Debug
-DWITH_GPU
=
OFF
-DWITH_DOC
=
ON
cmake ..
-DCMAKE_BUILD_TYPE
=
Debug
-DWITH_GPU
=
OFF
-DWITH_
MKLDNN
=
OFF
-DWITH_MKLML
=
OFF
-DWITH_
DOC
=
ON
make
-j
`
nproc
`
paddle_docs paddle_docs_cn
make
-j
`
nproc
`
paddle_docs paddle_docs_cn
# check websites for broken links
# check websites for broken links
...
...
paddle/string/piece.h
浏览文件 @
927adb4a
...
@@ -39,8 +39,8 @@ public:
...
@@ -39,8 +39,8 @@ public:
// size_ is 0.
// size_ is 0.
Piece
();
Piece
();
Piece
(
const
char
*
d
,
size_t
n
);
Piece
(
const
char
*
d
,
size_t
n
);
Piece
(
const
char
*
d
);
Piece
(
const
char
*
d
);
// NOLINT
Piece
(
const
std
::
string
&
s
);
Piece
(
const
std
::
string
&
s
);
// NOLINT
const
char
*
data
()
const
{
return
data_
;
}
const
char
*
data
()
const
{
return
data_
;
}
size_t
len
()
const
{
return
size_
;
}
size_t
len
()
const
{
return
size_
;
}
...
...
python/paddle/trainer_config_helpers/tests/configs/file_list.sh
浏览文件 @
927adb4a
...
@@ -7,7 +7,7 @@ test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight
...
@@ -7,7 +7,7 @@ test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight
test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer
test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer
test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer
,
test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer
test_kmax_seq_socre_layer
)
test_kmax_seq_socre_layer
)
export
whole_configs
=(
test_split_datasource
)
export
whole_configs
=(
test_split_datasource
)
python/paddle/v2/framework/tests/CMakeLists.txt
浏览文件 @
927adb4a
...
@@ -13,4 +13,5 @@ add_python_test(test_framework
...
@@ -13,4 +13,5 @@ add_python_test(test_framework
test_sigmoid_op.py
test_sigmoid_op.py
test_softmax_op.py
test_softmax_op.py
test_rowwise_add_op.py
test_rowwise_add_op.py
test_network.py
)
test_network.py
gradient_checker.py
)
python/paddle/v2/framework/tests/gradient_checker.py
0 → 100644
浏览文件 @
927adb4a
import
paddle.v2.framework.core
as
core
from
paddle.v2.framework.create_op_creation_methods
import
op_creations
import
numpy
import
unittest
__all__
=
[
'get_numeric_gradient'
]
def
get_numeric_gradient
(
op
,
input_values
,
output_name
,
input_to_check
,
delta
=
1e-2
,
local_scope
=
None
):
"""
Get Numeric Gradient for an operator's input.
:param op: C++ operator instance, could be an network
:param input_values: The input variables. Should be an dictionary, key is
variable name. Value is numpy array.
:param output_name: The final output variable name.
:param input_to_check: The input variable need to get gradient.
:param delta: The perturbation value for numeric gradient method. The
smaller delta is, the more accurate result will get. But if that delta is
too small, it could occur numerical stability problem.
:param local_scope: The local scope used for get_numeric_gradient.
:return: The gradient array in numpy format.
"""
if
local_scope
is
None
:
local_scope
=
core
.
Scope
()
# Create all input variable in local_scope
for
var_name
in
input_values
:
var
=
local_scope
.
new_var
(
var_name
)
tensor
=
var
.
get_tensor
()
tensor
.
set_dims
(
input_values
[
var_name
].
shape
)
tensor
.
alloc_float
(
core
.
CPUPlace
())
tensor
.
set
(
input_values
[
var_name
],
core
.
CPUPlace
())
# Create all output variable in local_scope
for
output
in
op
.
outputs
():
if
local_scope
.
find_var
(
output
)
is
None
:
local_scope
.
new_var
(
output
).
get_tensor
()
op
.
infer_shape
(
local_scope
)
# allocate output memory
for
output
in
op
.
outputs
():
local_scope
.
find_var
(
output
).
get_tensor
().
alloc_float
(
core
.
CPUPlace
())
# TODO(yuyang18): Only CPU is support now.
cpu_ctx
=
core
.
DeviceContext
.
create
(
core
.
CPUPlace
())
def
get_output
():
op
.
run
(
local_scope
,
cpu_ctx
)
return
numpy
.
array
(
local_scope
.
find_var
(
output_name
).
get_tensor
()).
sum
()
def
product
(
dim
):
return
reduce
(
lambda
a
,
b
:
a
*
b
,
dim
,
1
)
tensor_to_check
=
local_scope
.
find_var
(
input_to_check
).
get_tensor
()
tensor_size
=
product
(
tensor_to_check
.
get_dims
())
gradient_flat
=
numpy
.
zeros
(
shape
=
(
tensor_size
,
),
dtype
=
'float32'
)
for
i
in
xrange
(
tensor_size
):
origin
=
tensor_to_check
.
get_float_element
(
i
)
x_pos
=
origin
+
delta
tensor_to_check
.
set_float_element
(
i
,
x_pos
)
y_pos
=
get_output
()
x_neg
=
origin
-
delta
tensor_to_check
.
set_float_element
(
i
,
x_neg
)
y_neg
=
get_output
()
tensor_to_check
.
set_float_element
(
i
,
origin
)
# restore old value
gradient_flat
[
i
]
=
(
y_pos
-
y_neg
)
/
delta
/
2
return
gradient_flat
.
reshape
(
tensor_to_check
.
get_dims
())
if
__name__
==
'__main__'
:
class
GetNumericGradientTest
(
unittest
.
TestCase
):
def
test_add_op
(
self
):
add_op
=
op_creations
.
add_two
(
X
=
"X"
,
Y
=
"Y"
,
Out
=
"Z"
)
x
=
numpy
.
random
.
random
((
10
,
1
)).
astype
(
"float32"
)
y
=
numpy
.
random
.
random
((
10
,
1
)).
astype
(
"float32"
)
arr
=
get_numeric_gradient
(
add_op
,
{
'X'
:
x
,
"Y"
:
y
},
'Z'
,
'X'
)
self
.
assertAlmostEqual
(
arr
.
mean
(),
1.0
,
delta
=
1e-2
)
unittest
.
main
()
python/paddle/v2/framework/tests/test_softmax_op.py
浏览文件 @
927adb4a
import
unittest
import
unittest
from
op_test_util
import
OpTestMeta
import
numpy
as
np
import
numpy
as
np
import
paddle.v2.framework.core
as
core
import
paddle.v2.framework.create_op_creation_methods
as
creation
from
op_test_util
import
OpTestMeta
def
stable_softmax
(
x
):
def
stable_softmax
(
x
):
...
@@ -19,5 +23,63 @@ class TestSoftmaxOp(unittest.TestCase):
...
@@ -19,5 +23,63 @@ class TestSoftmaxOp(unittest.TestCase):
self
.
Y
=
np
.
apply_along_axis
(
stable_softmax
,
1
,
self
.
X
)
self
.
Y
=
np
.
apply_along_axis
(
stable_softmax
,
1
,
self
.
X
)
class
TestSoftmaxGradOp
(
unittest
.
TestCase
):
def
test_softmax_grad
(
self
):
op
=
creation
.
op_creations
.
softmax
(
X
=
"X"
,
Y
=
"Y"
)
backward_op
=
core
.
Operator
.
backward
(
op
,
set
())
self
.
assertEqual
(
backward_op
.
type
(),
"softmax_grad"
)
expected
=
'''Op(softmax_grad), inputs:(X, Y, Y@GRAD), outputs:(X@GRAD).'''
self
.
assertEqual
(
expected
,
str
(
backward_op
))
batch_size
=
3
class_num
=
5
# Initialize X and add 1e-2 for numerical stability
Y
=
np
.
random
.
rand
(
batch_size
,
class_num
).
astype
(
np
.
float32
)
Y
=
Y
+
1e-2
dY
=
np
.
random
.
rand
(
batch_size
,
class_num
).
astype
(
np
.
float32
)
# Reference implementation of cross entropy with soft labels
def
label_softmax_grad
(
Y
,
dY
):
dX
=
Y
*
0.0
for
i
in
range
(
batch_size
):
d
=
np
.
dot
(
Y
[
i
,
:],
dY
[
i
,
:])
dX
[
i
,
:]
=
Y
[
i
,
:]
*
(
dY
[
i
,
:]
-
d
)
return
dX
expected
=
label_softmax_grad
(
Y
,
dY
)
scope
=
core
.
Scope
()
places
=
[]
places
.
append
(
core
.
CPUPlace
())
if
core
.
is_compile_gpu
():
places
.
append
(
core
.
GPUPlace
(
0
))
for
place
in
places
:
y
=
scope
.
new_var
(
"Y"
)
y_tensor
=
y
.
get_tensor
()
y_tensor
.
set_dims
([
batch_size
,
class_num
])
y_tensor
.
alloc_float
(
place
)
y_tensor
.
set
(
Y
,
place
)
dy
=
scope
.
new_var
(
"Y@GRAD"
)
dy_tensor
=
dy
.
get_tensor
()
dy_tensor
.
set_dims
([
batch_size
,
class_num
])
dy_tensor
.
alloc_float
(
place
)
dy_tensor
.
set
(
dY
,
place
)
x
=
scope
.
new_var
(
"X"
)
dx
=
scope
.
new_var
(
"X@GRAD"
)
tensor
=
scope
.
find_var
(
"X@GRAD"
).
get_tensor
()
backward_op
.
infer_shape
(
scope
)
self
.
assertEqual
([
batch_size
,
class_num
],
tensor
.
shape
())
ctx
=
core
.
DeviceContext
.
create
(
place
)
backward_op
.
run
(
scope
,
ctx
)
actual
=
np
.
array
(
tensor
)
np
.
testing
.
assert_almost_equal
(
actual
,
expected
,
decimal
=
3
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录