Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
5ee63bb6
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5ee63bb6
编写于
11月 17, 2017
作者:
W
wangmeng28
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'upstream/develop' into factorization_machine_layer
上级
571ef90c
2e7ffbd1
变更
89
隐藏空白更改
内联
并排
Showing
89 changed file
with
1487 addition
and
3240 deletion
+1487
-3240
CMakeLists.txt
CMakeLists.txt
+17
-11
cmake/configure.cmake
cmake/configure.cmake
+8
-21
cmake/cross_compiling/ios.cmake
cmake/cross_compiling/ios.cmake
+3
-5
cmake/cuda.cmake
cmake/cuda.cmake
+188
-0
cmake/external/mkldnn.cmake
cmake/external/mkldnn.cmake
+7
-7
cmake/external/openblas.cmake
cmake/external/openblas.cmake
+6
-7
cmake/external/warpctc.cmake
cmake/external/warpctc.cmake
+4
-0
cmake/flags.cmake
cmake/flags.cmake
+0
-55
cmake/util.cmake
cmake/util.cmake
+2
-2
doc/api/v2/config/layer.rst
doc/api/v2/config/layer.rst
+10
-0
doc/design/mkldnn/README.MD
doc/design/mkldnn/README.MD
+4
-4
doc/howto/dev/write_docs_cn.rst
doc/howto/dev/write_docs_cn.rst
+1
-1
doc/mobile/cross_compiling_for_android_cn.md
doc/mobile/cross_compiling_for_android_cn.md
+1
-1
doc/mobile/cross_compiling_for_ios_cn.md
doc/mobile/cross_compiling_for_ios_cn.md
+6
-6
doc/mobile/cross_compiling_for_raspberry_cn.md
doc/mobile/cross_compiling_for_raspberry_cn.md
+1
-1
paddle/cuda/include/hl_gpu.h
paddle/cuda/include/hl_gpu.h
+2
-0
paddle/gserver/CMakeLists.txt
paddle/gserver/CMakeLists.txt
+0
-1
paddle/gserver/dataproviders/DataProvider.cpp
paddle/gserver/dataproviders/DataProvider.cpp
+1
-3
paddle/gserver/dataproviders/ProtoDataProvider.cpp
paddle/gserver/dataproviders/ProtoDataProvider.cpp
+0
-932
paddle/gserver/dataproviders/ProtoDataProvider.h
paddle/gserver/dataproviders/ProtoDataProvider.h
+0
-179
paddle/gserver/layers/DotProdLayer.cpp
paddle/gserver/layers/DotProdLayer.cpp
+97
-0
paddle/gserver/layers/MKLDNNConcatLayer.cpp
paddle/gserver/layers/MKLDNNConcatLayer.cpp
+202
-0
paddle/gserver/layers/MKLDNNConcatLayer.h
paddle/gserver/layers/MKLDNNConcatLayer.h
+129
-0
paddle/gserver/layers/MKLDNNLayer.cpp
paddle/gserver/layers/MKLDNNLayer.cpp
+7
-4
paddle/gserver/layers/MKLDNNLayer.h
paddle/gserver/layers/MKLDNNLayer.h
+4
-1
paddle/gserver/tests/CMakeLists.txt
paddle/gserver/tests/CMakeLists.txt
+22
-12
paddle/gserver/tests/MKLDNNTester.h
paddle/gserver/tests/MKLDNNTester.h
+1
-1
paddle/gserver/tests/proto_files.txt
paddle/gserver/tests/proto_files.txt
+0
-2
paddle/gserver/tests/proto_files_compressed.txt
paddle/gserver/tests/proto_files_compressed.txt
+0
-2
paddle/gserver/tests/sequence_lstm.conf
paddle/gserver/tests/sequence_lstm.conf
+64
-0
paddle/gserver/tests/sequence_recurrent.py
paddle/gserver/tests/sequence_recurrent.py
+56
-0
paddle/gserver/tests/sequence_recurrent_group.py
paddle/gserver/tests/sequence_recurrent_group.py
+70
-0
paddle/gserver/tests/test_CompareSparse.cpp
paddle/gserver/tests/test_CompareSparse.cpp
+1
-2
paddle/gserver/tests/test_CompareTwoNets.cpp
paddle/gserver/tests/test_CompareTwoNets.cpp
+7
-4
paddle/gserver/tests/test_LayerGrad.cpp
paddle/gserver/tests/test_LayerGrad.cpp
+15
-0
paddle/gserver/tests/test_MKLDNN.cpp
paddle/gserver/tests/test_MKLDNN.cpp
+41
-0
paddle/gserver/tests/test_ProtoDataProvider.cpp
paddle/gserver/tests/test_ProtoDataProvider.cpp
+0
-732
paddle/math/Storage.cpp
paddle/math/Storage.cpp
+4
-0
paddle/operators/CMakeLists.txt
paddle/operators/CMakeLists.txt
+20
-13
paddle/operators/conv_cudnn_op.cu.cc
paddle/operators/conv_cudnn_op.cu.cc
+4
-6
paddle/operators/conv_op.cc
paddle/operators/conv_op.cc
+8
-4
paddle/operators/conv_op.cu.cc
paddle/operators/conv_op.cu.cc
+8
-4
paddle/operators/conv_transpose_cudnn_op.cc
paddle/operators/conv_transpose_cudnn_op.cc
+29
-1
paddle/operators/conv_transpose_cudnn_op.cu.cc
paddle/operators/conv_transpose_cudnn_op.cu.cc
+20
-11
paddle/operators/conv_transpose_op.cc
paddle/operators/conv_transpose_op.cc
+9
-10
paddle/operators/conv_transpose_op.cu.cc
paddle/operators/conv_transpose_op.cu.cc
+8
-4
paddle/operators/conv_transpose_op.h
paddle/operators/conv_transpose_op.h
+2
-4
paddle/operators/gru_op.h
paddle/operators/gru_op.h
+40
-12
paddle/operators/math/CMakeLists.txt
paddle/operators/math/CMakeLists.txt
+2
-2
paddle/operators/math/im2col.cu
paddle/operators/math/im2col.cu
+2
-2
paddle/operators/pool_cudnn_op.cu.cc
paddle/operators/pool_cudnn_op.cu.cc
+1
-2
paddle/parameter/ParameterUpdateFunctions.cpp
paddle/parameter/ParameterUpdateFunctions.cpp
+1
-1
paddle/platform/cudnn_helper.h
paddle/platform/cudnn_helper.h
+10
-5
paddle/scripts/docker/README.md
paddle/scripts/docker/README.md
+1
-2
paddle/scripts/docker/build.sh
paddle/scripts/docker/build.sh
+2
-4
paddle/scripts/submit_local.sh.in
paddle/scripts/submit_local.sh.in
+5
-5
paddle/scripts/travis/build_doc.sh
paddle/scripts/travis/build_doc.sh
+1
-1
paddle/trainer/Trainer.cpp
paddle/trainer/Trainer.cpp
+4
-0
paddle/trainer/tests/CMakeLists.txt
paddle/trainer/tests/CMakeLists.txt
+0
-28
paddle/trainer/tests/mnist.list
paddle/trainer/tests/mnist.list
+0
-1
paddle/trainer/tests/mnist_bin_part
paddle/trainer/tests/mnist_bin_part
+0
-0
paddle/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.proto_data
...vider_wrapper_dir/test_pydata_provider_wrapper.proto_data
+0
-0
paddle/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.protolist
...ovider_wrapper_dir/test_pydata_provider_wrapper.protolist
+0
-1
paddle/trainer/tests/sample_trainer_config_compare_sparse.conf
...e/trainer/tests/sample_trainer_config_compare_sparse.conf
+0
-154
paddle/trainer/tests/sample_trainer_config_qb_rnn.conf
paddle/trainer/tests/sample_trainer_config_qb_rnn.conf
+0
-154
paddle/trainer/tests/sample_trainer_config_rnn.conf
paddle/trainer/tests/sample_trainer_config_rnn.conf
+0
-180
paddle/trainer/tests/testPyDataWrapper.py
paddle/trainer/tests/testPyDataWrapper.py
+0
-24
paddle/trainer/tests/test_CompareTwoOpts.cpp
paddle/trainer/tests/test_CompareTwoOpts.cpp
+0
-184
paddle/trainer/tests/test_PyDataProviderWrapper.cpp
paddle/trainer/tests/test_PyDataProviderWrapper.cpp
+0
-96
python/paddle/trainer/config_parser.py
python/paddle/trainer/config_parser.py
+27
-4
python/paddle/trainer_config_helpers/layers.py
python/paddle/trainer_config_helpers/layers.py
+41
-0
python/paddle/trainer_config_helpers/tests/configs/file_list.sh
.../paddle/trainer_config_helpers/tests/configs/file_list.sh
+1
-1
python/paddle/trainer_config_helpers/tests/configs/protostr/test_dot_prod_layer.protostr
...lpers/tests/configs/protostr/test_dot_prod_layer.protostr
+38
-0
python/paddle/trainer_config_helpers/tests/configs/test_dot_prod_layer.py
...ainer_config_helpers/tests/configs/test_dot_prod_layer.py
+7
-0
python/paddle/v2/fluid/framework.py
python/paddle/v2/fluid/framework.py
+25
-20
python/paddle/v2/fluid/tests/book/test_fit_a_line.py
python/paddle/v2/fluid/tests/book/test_fit_a_line.py
+9
-20
python/paddle/v2/fluid/tests/book/test_image_classification_train.py
...le/v2/fluid/tests/book/test_image_classification_train.py
+20
-80
python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py
.../paddle/v2/fluid/tests/book/test_recognize_digits_conv.py
+9
-20
python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
...n/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
+10
-25
python/paddle/v2/fluid/tests/book/test_recommender_system.py
python/paddle/v2/fluid/tests/book/test_recommender_system.py
+27
-72
python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py
...dle/v2/fluid/tests/book/test_understand_sentiment_conv.py
+5
-6
python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py
...luid/tests/book/test_understand_sentiment_dynamic_lstm.py
+4
-6
python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py
...dle/v2/fluid/tests/book/test_understand_sentiment_lstm.py
+4
-5
python/paddle/v2/fluid/tests/book/test_word2vec.py
python/paddle/v2/fluid/tests/book/test_word2vec.py
+13
-36
python/paddle/v2/fluid/tests/test_conv2d_op.py
python/paddle/v2/fluid/tests/test_conv2d_op.py
+28
-12
python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py
python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py
+21
-5
python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py
python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py
+29
-8
python/paddle/v2/fluid/tests/test_gru_op.py
python/paddle/v2/fluid/tests/test_gru_op.py
+9
-7
python/paddle/v2/fluid/tests/test_is_empty_op.py
python/paddle/v2/fluid/tests/test_is_empty_op.py
+2
-2
未找到文件。
CMakeLists.txt
浏览文件 @
5ee63bb6
...
...
@@ -36,8 +36,7 @@ include(simd)
################################ Configurations #######################################
option
(
WITH_GPU
"Compile PaddlePaddle with NVIDIA GPU"
${
CUDA_FOUND
}
)
option
(
WITH_AVX
"Compile PaddlePaddle with AVX intrinsics"
${
AVX_FOUND
}
)
option
(
WITH_MKLDNN
"Compile PaddlePaddle with mkl-dnn support."
${
AVX_FOUND
}
)
option
(
WITH_MKLML
"Compile PaddlePaddle with mklml package."
${
AVX_FOUND
}
)
option
(
WITH_MKL
"Compile PaddlePaddle with MKL support."
${
AVX_FOUND
}
)
option
(
WITH_DSO
"Compile PaddlePaddle with dynamic linked CUDA"
ON
)
option
(
WITH_TESTING
"Compile PaddlePaddle with unit testing"
ON
)
option
(
WITH_SWIG_PY
"Compile PaddlePaddle with inference api"
ON
)
...
...
@@ -82,10 +81,8 @@ if(ANDROID OR IOS)
"Disable PYTHON when cross-compiling for Android and iOS"
FORCE
)
set
(
WITH_RDMA OFF CACHE STRING
"Disable RDMA when cross-compiling for Android and iOS"
FORCE
)
set
(
WITH_MKLDNN OFF CACHE STRING
"Disable MKLDNN when cross-compiling for Android and iOS"
FORCE
)
set
(
WITH_MKLML OFF CACHE STRING
"Disable MKLML package when cross-compiling for Android and iOS"
FORCE
)
set
(
WITH_MKL OFF CACHE STRING
"Disable MKL when cross-compiling for Android and iOS"
FORCE
)
# Compile PaddlePaddle mobile inference library
if
(
NOT WITH_C_API
)
...
...
@@ -111,6 +108,14 @@ else()
set
(
THIRD_PARTY_BUILD_TYPE Release
)
endif
()
set
(
WITH_MKLML
${
WITH_MKL
}
)
if
(
WITH_MKL AND
${
AVX2_FOUND
}
)
set
(
WITH_MKLDNN ON
)
else
()
message
(
STATUS
"Do not have AVX2 intrinsics and disabled MKL-DNN"
)
set
(
WITH_MKLDNN OFF
)
endif
()
########################################################################################
include
(
external/mklml
)
# download mklml package
...
...
@@ -158,14 +163,15 @@ set(EXTERNAL_LIBS
)
if
(
WITH_GPU
)
list
(
APPEND EXTERNAL_LIBS
${
CUDA_LIBRARIES
}
${
CUDA_rt_LIBRARY
}
)
if
(
NOT WITH_DSO
)
list
(
APPEND EXTERNAL_LIBS
${
CUDNN_LIBRARY
}
${
CUDA_CUBLAS_LIBRARIES
}
${
CUDA_curand_LIBRARY
}
${
NCCL_LIBRARY
}
)
endif
(
NOT WITH_DSO
)
include
(
cuda
)
endif
(
WITH_GPU
)
if
(
WITH_MKLML
)
list
(
APPEND EXTERNAL_LIBS
${
MKLML_IOMP_LIB
}
)
endif
()
if
(
WITH_MKLDNN
)
list
(
APPEND EXTERNAL_LIBS
${
MKLDNN_LIB
}
${
MKLDNN_IOMP_LIB
}
)
list
(
APPEND EXTERNAL_LIBS
${
MKLDNN_LIB
}
)
endif
()
if
(
USE_NNPACK
)
...
...
cmake/configure.cmake
浏览文件 @
5ee63bb6
...
...
@@ -76,27 +76,14 @@ else()
include_directories
(
${
CUDA_TOOLKIT_INCLUDE
}
)
endif
(
NOT WITH_GPU
)
if
(
WITH_MKLDNN
)
add_definitions
(
-DPADDLE_USE_MKLDNN
)
if
(
WITH_MKLML AND MKLDNN_IOMP_DIR
)
message
(
STATUS
"Enable Intel OpenMP at
${
MKLDNN_IOMP_DIR
}
"
)
set
(
OPENMP_FLAGS
"-fopenmp"
)
set
(
CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS
${
OPENMP_FLAGS
}
)
set
(
CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS
${
OPENMP_FLAGS
}
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
${
OPENMP_FLAGS
}
"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
${
OPENMP_FLAGS
}
"
)
else
()
find_package
(
OpenMP
)
if
(
OPENMP_FOUND
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
${
OpenMP_C_FLAGS
}
"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
${
OpenMP_CXX_FLAGS
}
"
)
else
()
message
(
WARNING
"Can not find OpenMP."
"Some performance features in MKLDNN may not be available"
)
endif
()
endif
()
endif
(
WITH_MKLDNN
)
if
(
WITH_MKLML AND MKLML_IOMP_LIB
)
message
(
STATUS
"Enable Intel OpenMP with
${
MKLML_IOMP_LIB
}
"
)
set
(
OPENMP_FLAGS
"-fopenmp"
)
set
(
CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS
${
OPENMP_FLAGS
}
)
set
(
CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS
${
OPENMP_FLAGS
}
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
${
OPENMP_FLAGS
}
"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
${
OPENMP_FLAGS
}
"
)
endif
()
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
${
SIMD_FLAG
}
"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
${
SIMD_FLAG
}
"
)
...
...
cmake/cross_compiling/ios.cmake
浏览文件 @
5ee63bb6
...
...
@@ -76,11 +76,9 @@ set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform")
# Set the architecture for iOS
if
(
NOT DEFINED IOS_ARCH
)
if
(
IOS_PLATFORM STREQUAL
"OS"
)
# FIXME(liuyiqun): support "armv7;armv7s;arm64" future
set
(
IOS_ARCH
"arm64"
)
set
(
IOS_ARCH
"armv7;armv7s;arm64"
)
elseif
(
IOS_PLATFORM STREQUAL
"SIMULATOR"
)
# FIXME(liuyiqun): support "i386;x86_64" future
set
(
IOS_ARCH
"x86_64"
)
set
(
IOS_ARCH
"i386;x86_64"
)
endif
()
endif
()
set
(
CMAKE_OSX_ARCHITECTURES
${
IOS_ARCH
}
CACHE string
"Build architecture for iOS"
)
...
...
@@ -248,7 +246,7 @@ set(IOS_COMPILER_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} ${XCODE_IOS_BITCODE_
# Hidden visibilty is required for cxx on iOS
set
(
CMAKE_C_FLAGS
"
${
IOS_COMPILER_FLAGS
}
${
CMAKE_C_FLAGS
}
"
CACHE STRING
"C flags"
)
set
(
CMAKE_CXX_FLAGS
"
${
IOS_COMPILER_FLAGS
}
-fvisibility-inlines-hidden
${
CMAKE_CXX_FLAGS
}
"
CACHE STRING
"CXX flags"
)
set
(
CMAKE_CXX_FLAGS
"
${
IOS_COMPILER_FLAGS
}
-fvisibility
=hidden -fvisibility
-inlines-hidden
${
CMAKE_CXX_FLAGS
}
"
CACHE STRING
"CXX flags"
)
set
(
IOS_LINK_FLAGS
"
${
XCODE_IOS_PLATFORM_VERSION_FLAGS
}
-Wl,-search_paths_first"
)
...
...
cmake/cuda.cmake
0 → 100644
浏览文件 @
5ee63bb6
if
(
NOT WITH_GPU
)
return
()
endif
()
set
(
paddle_known_gpu_archs
"30 35 50 52 60 61 70"
)
set
(
paddle_known_gpu_archs7
"30 35 50 52"
)
set
(
paddle_known_gpu_archs8
"30 35 50 52 60 61"
)
######################################################################################
# A function for automatic detection of GPUs installed (if autodetection is enabled)
# Usage:
# detect_installed_gpus(out_variable)
function
(
detect_installed_gpus out_variable
)
if
(
NOT CUDA_gpu_detect_output
)
set
(
cufile
${
PROJECT_BINARY_DIR
}
/detect_cuda_archs.cu
)
file
(
WRITE
${
cufile
}
""
"#include <cstdio>
\n
"
"int main() {
\n
"
" int count = 0;
\n
"
" if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;
\n
"
" if (count == 0) return -1;
\n
"
" for (int device = 0; device < count; ++device) {
\n
"
" cudaDeviceProp prop;
\n
"
" if (cudaSuccess == cudaGetDeviceProperties(&prop, device))
\n
"
" std::printf(
\"
%d.%d
\"
, prop.major, prop.minor);
\n
"
" }
\n
"
" return 0;
\n
"
"}
\n
"
)
execute_process
(
COMMAND
"
${
CUDA_NVCC_EXECUTABLE
}
"
"-ccbin=
${
CUDA_HOST_COMPILER
}
"
"--run"
"
${
cufile
}
"
WORKING_DIRECTORY
"
${
PROJECT_BINARY_DIR
}
/CMakeFiles/"
RESULT_VARIABLE nvcc_res OUTPUT_VARIABLE nvcc_out
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE
)
if
(
nvcc_res EQUAL 0
)
# only keep the last line of nvcc_out
STRING
(
REGEX REPLACE
";"
"
\\\\
;"
nvcc_out
"
${
nvcc_out
}
"
)
STRING
(
REGEX REPLACE
"
\n
"
";"
nvcc_out
"
${
nvcc_out
}
"
)
list
(
GET nvcc_out -1 nvcc_out
)
string
(
REPLACE
"2.1"
"2.1(2.0)"
nvcc_out
"
${
nvcc_out
}
"
)
set
(
CUDA_gpu_detect_output
${
nvcc_out
}
CACHE INTERNAL
"Returned GPU architetures from detect_installed_gpus tool"
FORCE
)
endif
()
endif
()
if
(
NOT CUDA_gpu_detect_output
)
message
(
STATUS
"Automatic GPU detection failed. Building for all known architectures."
)
set
(
${
out_variable
}
${
paddle_known_gpu_archs
}
PARENT_SCOPE
)
else
()
set
(
${
out_variable
}
${
CUDA_gpu_detect_output
}
PARENT_SCOPE
)
endif
()
endfunction
()
########################################################################
# Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME
# Usage:
# select_nvcc_arch_flags(out_variable)
function
(
select_nvcc_arch_flags out_variable
)
# List of arch names
set
(
archs_names
"Kepler"
"Maxwell"
"Pascal"
"All"
"Manual"
)
set
(
archs_name_default
"All"
)
if
(
NOT CMAKE_CROSSCOMPILING
)
list
(
APPEND archs_names
"Auto"
)
endif
()
# set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui)
set
(
CUDA_ARCH_NAME
${
archs_name_default
}
CACHE STRING
"Select target NVIDIA GPU achitecture."
)
set_property
(
CACHE CUDA_ARCH_NAME PROPERTY STRINGS
""
${
archs_names
}
)
mark_as_advanced
(
CUDA_ARCH_NAME
)
# verify CUDA_ARCH_NAME value
if
(
NOT
";
${
archs_names
}
;"
MATCHES
";
${
CUDA_ARCH_NAME
}
;"
)
string
(
REPLACE
";"
", "
archs_names
"
${
archs_names
}
"
)
message
(
FATAL_ERROR
"Only
${
archs_names
}
architeture names are supported."
)
endif
()
if
(
${
CUDA_ARCH_NAME
}
STREQUAL
"Manual"
)
set
(
CUDA_ARCH_BIN
${
paddle_known_gpu_archs
}
CACHE STRING
"Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported"
)
set
(
CUDA_ARCH_PTX
"50"
CACHE STRING
"Specify 'virtual' PTX architectures to build PTX intermediate code for"
)
mark_as_advanced
(
CUDA_ARCH_BIN CUDA_ARCH_PTX
)
else
()
unset
(
CUDA_ARCH_BIN CACHE
)
unset
(
CUDA_ARCH_PTX CACHE
)
endif
()
if
(
${
CUDA_ARCH_NAME
}
STREQUAL
"Kepler"
)
set
(
cuda_arch_bin
"30 35"
)
elseif
(
${
CUDA_ARCH_NAME
}
STREQUAL
"Maxwell"
)
set
(
cuda_arch_bin
"50"
)
elseif
(
${
CUDA_ARCH_NAME
}
STREQUAL
"Pascal"
)
set
(
cuda_arch_bin
"60 61"
)
elseif
(
${
CUDA_ARCH_NAME
}
STREQUAL
"Volta"
)
set
(
cuda_arch_bin
"70"
)
elseif
(
${
CUDA_ARCH_NAME
}
STREQUAL
"All"
)
set
(
cuda_arch_bin
${
paddle_known_gpu_archs
}
)
elseif
(
${
CUDA_ARCH_NAME
}
STREQUAL
"Auto"
)
detect_installed_gpus
(
cuda_arch_bin
)
else
()
# (${CUDA_ARCH_NAME} STREQUAL "Manual")
set
(
cuda_arch_bin
${
CUDA_ARCH_BIN
}
)
endif
()
# remove dots and convert to lists
string
(
REGEX REPLACE
"
\\
."
""
cuda_arch_bin
"
${
cuda_arch_bin
}
"
)
string
(
REGEX REPLACE
"
\\
."
""
cuda_arch_ptx
"
${
CUDA_ARCH_PTX
}
"
)
string
(
REGEX MATCHALL
"[0-9()]+"
cuda_arch_bin
"
${
cuda_arch_bin
}
"
)
string
(
REGEX MATCHALL
"[0-9]+"
cuda_arch_ptx
"
${
cuda_arch_ptx
}
"
)
list
(
REMOVE_DUPLICATES cuda_arch_bin
)
list
(
REMOVE_DUPLICATES cuda_arch_ptx
)
set
(
nvcc_flags
""
)
set
(
nvcc_archs_readable
""
)
# Tell NVCC to add binaries for the specified GPUs
foreach
(
arch
${
cuda_arch_bin
}
)
if
(
arch MATCHES
"([0-9]+)
\\
(([0-9]+)
\\
)"
)
# User explicitly specified PTX for the concrete BIN
list
(
APPEND nvcc_flags -gencode arch=compute_
${
CMAKE_MATCH_2
}
,code=sm_
${
CMAKE_MATCH_1
}
)
list
(
APPEND nvcc_archs_readable sm_
${
CMAKE_MATCH_1
}
)
else
()
# User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
list
(
APPEND nvcc_flags -gencode arch=compute_
${
arch
}
,code=sm_
${
arch
}
)
list
(
APPEND nvcc_archs_readable sm_
${
arch
}
)
endif
()
endforeach
()
# Tell NVCC to add PTX intermediate code for the specified architectures
foreach
(
arch
${
cuda_arch_ptx
}
)
list
(
APPEND nvcc_flags -gencode arch=compute_
${
arch
}
,code=compute_
${
arch
}
)
list
(
APPEND nvcc_archs_readable compute_
${
arch
}
)
endforeach
()
string
(
REPLACE
";"
" "
nvcc_archs_readable
"
${
nvcc_archs_readable
}
"
)
set
(
${
out_variable
}
${
nvcc_flags
}
PARENT_SCOPE
)
set
(
${
out_variable
}
_readable
${
nvcc_archs_readable
}
PARENT_SCOPE
)
endfunction
()
message
(
STATUS
"CUDA detected: "
${
CUDA_VERSION
}
)
if
(
${
CUDA_VERSION
}
LESS 7.0
)
set
(
paddle_known_gpu_archs
${
paddle_known_gpu_archs
}
)
elseif
(
${
CUDA_VERSION
}
LESS 8.0
)
# CUDA 7.x
set
(
paddle_known_gpu_archs
${
paddle_known_gpu_archs7
}
)
list
(
APPEND CUDA_NVCC_FLAGS
"-D_MWAITXINTRIN_H_INCLUDED"
)
list
(
APPEND CUDA_NVCC_FLAGS
"-D__STRICT_ANSI__"
)
elseif
(
${
CUDA_VERSION
}
LESS 9.0
)
# CUDA 8.x
set
(
paddle_known_gpu_archs
${
paddle_known_gpu_archs8
}
)
list
(
APPEND CUDA_NVCC_FLAGS
"-D_MWAITXINTRIN_H_INCLUDED"
)
list
(
APPEND CUDA_NVCC_FLAGS
"-D__STRICT_ANSI__"
)
# CUDA 8 may complain that sm_20 is no longer supported. Suppress the
# warning for now.
list
(
APPEND CUDA_NVCC_FLAGS
"-Wno-deprecated-gpu-targets"
)
endif
()
include_directories
(
${
CUDA_INCLUDE_DIRS
}
)
list
(
APPEND EXTERNAL_LIBS
${
CUDA_LIBRARIES
}
${
CUDA_rt_LIBRARY
}
)
if
(
NOT WITH_DSO
)
list
(
APPEND EXTERNAL_LIBS
${
CUDNN_LIBRARY
}
${
CUDA_CUBLAS_LIBRARIES
}
${
CUDA_curand_LIBRARY
}
${
NCCL_LIBRARY
}
)
endif
(
NOT WITH_DSO
)
# setting nvcc arch flags
select_nvcc_arch_flags
(
NVCC_FLAGS_EXTRA
)
list
(
APPEND CUDA_NVCC_FLAGS
${
NVCC_FLAGS_EXTRA
}
)
message
(
STATUS
"Added CUDA NVCC flags for:
${
NVCC_FLAGS_EXTRA_readable
}
"
)
# Set C++11 support
set
(
CUDA_PROPAGATE_HOST_FLAGS OFF
)
# Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc.
# So, don't set these flags here.
list
(
APPEND CUDA_NVCC_FLAGS
"-std=c++11"
)
list
(
APPEND CUDA_NVCC_FLAGS
"--use_fast_math"
)
list
(
APPEND CUDA_NVCC_FLAGS
"-Xcompiler -fPIC"
)
# Set :expt-relaxed-constexpr to suppress Eigen warnings
list
(
APPEND CUDA_NVCC_FLAGS
"--expt-relaxed-constexpr"
)
if
(
CMAKE_BUILD_TYPE STREQUAL
"Debug"
)
list
(
APPEND CUDA_NVCC_FLAGS
${
CMAKE_CXX_FLAGS_DEBUG
}
)
elseif
(
CMAKE_BUILD_TYPE STREQUAL
"Release"
)
list
(
APPEND CUDA_NVCC_FLAGS
${
CMAKE_CXX_FLAGS_RELEASE
}
)
elseif
(
CMAKE_BUILD_TYPE STREQUAL
"RelWithDebInfo"
)
list
(
APPEND CUDA_NVCC_FLAGS
${
CMAKE_CXX_FLAGS_RELWITHDEBINFO
}
)
elseif
(
CMAKE_BUILD_TYPE STREQUAL
"MinSizeRel"
)
list
(
APPEND CUDA_NVCC_FLAGS
${
CMAKE_CXX_FLAGS_MINSIZEREL
}
)
endif
()
mark_as_advanced
(
CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD
)
mark_as_advanced
(
CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION
)
cmake/external/mkldnn.cmake
浏览文件 @
5ee63bb6
...
...
@@ -40,10 +40,9 @@ INCLUDE_DIRECTORIES(${MKLDNN_INC_DIR})
IF
(
${
CBLAS_PROVIDER
}
STREQUAL
"MKLML"
)
SET
(
MKLDNN_DEPENDS
${
MKLML_PROJECT
}
)
SET
(
MKLDNN_MKLROOT
${
MKLML_ROOT
}
)
SET
(
MKLDNN_IOMP_LIB
${
MKLML_IOMP_LIB
}
)
SET
(
MKLDNN_IOMP_DIR
${
MKLML_LIB_DIR
}
)
MESSAGE
(
STATUS
"Build MKLDNN with
${
MKLDNN_MKLROOT
}
"
)
MESSAGE
(
STATUS
"Build MKLDNN with MKLML
${
MKLML_ROOT
}
"
)
ELSE
()
MESSAGE
(
FATAL_ERROR
"Should enable MKLML when build MKLDNN"
)
ENDIF
()
SET
(
MKLDNN_CFLAG
"
${
CMAKE_C_FLAGS
}
-Wno-error=strict-overflow"
)
...
...
@@ -57,15 +56,16 @@ ExternalProject_Add(
PREFIX
${
MKLDNN_SOURCES_DIR
}
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
${
MKLDNN_INSTALL_DIR
}
CMAKE_ARGS -DMKLROOT=
${
MKL
DNN_MKL
ROOT
}
CMAKE_ARGS -DMKLROOT=
${
MKL
ML_
ROOT
}
CMAKE_ARGS -DCMAKE_C_FLAGS=
${
MKLDNN_CFLAG
}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=
${
MKLDNN_CXXFLAG
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
MKLDNN_INSTALL_DIR
}
-DMKLROOT:PATH=
${
MKL
DNN_MKL
ROOT
}
-DMKLROOT:PATH=
${
MKL
ML_
ROOT
}
)
ADD_LIBRARY
(
mkldnn SHARED IMPORTED GLOBAL
)
SET_PROPERTY
(
TARGET mkldnn PROPERTY IMPORTED_LOCATION
${
MKLDNN_LIB
}
)
ADD_DEPENDENCIES
(
mkldnn
${
MKLDNN_PROJECT
}
)
MESSAGE
(
STATUS
"Mkldnn library:
${
MKLDNN_LIB
}
"
)
MESSAGE
(
STATUS
"MKLDNN library:
${
MKLDNN_LIB
}
"
)
add_definitions
(
-DPADDLE_USE_MKLDNN
)
LIST
(
APPEND external_project_dependencies mkldnn
)
cmake/external/openblas.cmake
浏览文件 @
5ee63bb6
...
...
@@ -45,15 +45,14 @@ IF(NOT ${CBLAS_FOUND})
SET
(
OPTIONAL_ARGS
${
OPTIONAL_ARGS
}
TARGET=ARMV8 BINARY=64 USE_THREAD=0
)
ENDIF
()
ELSEIF
(
IOS
)
# FIXME(liuyiqun): support multiple architectures
SET
(
OPENBLAS_COMMIT
"b5c96fcfcdc82945502a2303116a64d89985daf5"
)
SET
(
OPENBLAS_CC
"
${
OPENBLAS_CC
}
${
CMAKE_C_FLAGS
}
-isysroot
${
CMAKE_OSX_SYSROOT
}
"
)
IF
(
CMAKE_OSX_ARCHITECTURES MATCHES
"armv7"
)
SET
(
OPENBLAS_CC
"
${
OPENBLAS_CC
}
-arch armv7"
)
SET
(
OPTIONAL_ARGS
${
OPTIONAL_ARGS
}
TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0
)
ELSEIF
(
CMAKE_OSX_ARCHITECTURES MATCHES
"arm64"
)
IF
(
CMAKE_OSX_ARCHITECTURES MATCHES
"arm64"
)
SET
(
OPENBLAS_COMMIT
"b5c96fcfcdc82945502a2303116a64d89985daf5"
)
SET
(
OPENBLAS_CC
"
${
OPENBLAS_CC
}
${
CMAKE_C_FLAGS
}
-isysroot
${
CMAKE_OSX_SYSROOT
}
"
)
SET
(
OPENBLAS_CC
"
${
OPENBLAS_CC
}
-arch arm64"
)
SET
(
OPTIONAL_ARGS
${
OPTIONAL_ARGS
}
TARGET=ARMV8 BINARY=64 USE_THREAD=0 CROSS_SUFFIX=
${
CROSS_SUFFIX
}
)
ELSE
()
MESSAGE
(
FATAL_ERROR
"OpenBLAS only support arm64 architectures on iOS. "
"You can set IOS_USE_VECLIB_FOR_BLAS=ON or USE_EIGEN_FOR_BLAS=ON to use other blas library instead."
)
ENDIF
()
ELSEIF
(
RPI
)
# use hardfp
...
...
cmake/external/warpctc.cmake
浏览文件 @
5ee63bb6
...
...
@@ -12,6 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
IF
(
MOBILE_INFERENCE
)
return
()
ENDIF
()
INCLUDE
(
ExternalProject
)
SET
(
WARPCTC_SOURCES_DIR
${
THIRD_PARTY_PATH
}
/warpctc
)
...
...
cmake/flags.cmake
浏览文件 @
5ee63bb6
...
...
@@ -149,58 +149,3 @@ endforeach()
foreach
(
flag
${
GPU_COMMON_FLAGS
}
)
safe_set_nvflag
(
${
flag
}
)
endforeach
()
set
(
CUDA_PROPAGATE_HOST_FLAGS OFF
)
# Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc.
# So, don't set these flags here.
LIST
(
APPEND CUDA_NVCC_FLAGS -std=c++11
)
LIST
(
APPEND CUDA_NVCC_FLAGS --use_fast_math
)
if
(
CMAKE_BUILD_TYPE STREQUAL
"Debug"
)
LIST
(
APPEND CUDA_NVCC_FLAGS
${
CMAKE_CXX_FLAGS_DEBUG
}
)
elseif
(
CMAKE_BUILD_TYPE STREQUAL
"Release"
)
LIST
(
APPEND CUDA_NVCC_FLAGS
${
CMAKE_CXX_FLAGS_RELEASE
}
)
elseif
(
CMAKE_BUILD_TYPE STREQUAL
"RelWithDebInfo"
)
LIST
(
APPEND CUDA_NVCC_FLAGS
${
CMAKE_CXX_FLAGS_RELWITHDEBINFO
}
)
elseif
(
CMAKE_BUILD_TYPE STREQUAL
"MinSizeRel"
)
LIST
(
APPEND CUDA_NVCC_FLAGS
${
CMAKE_CXX_FLAGS_MINSIZEREL
}
)
endif
()
function
(
specify_cuda_arch cuda_version cuda_arch
)
if
(
${
cuda_version
}
VERSION_GREATER
"8.0"
)
foreach
(
capability 61 62
)
if
(
${
cuda_arch
}
STREQUAL
${
capability
}
)
list
(
APPEND __arch_flags
" -gencode arch=compute_
${
cuda_arch
}
,code=sm_
${
cuda_arch
}
"
)
endif
()
endforeach
()
elseif
(
${
cuda_version
}
VERSION_GREATER
"7.0"
and
${
cuda_arch
}
STREQUAL
"53"
)
list
(
APPEND __arch_flags
" -gencode arch=compute_
${
cuda_arch
}
,code=sm_
${
cuda_arch
}
"
)
endif
()
endfunction
()
# Common gpu architectures: Kepler, Maxwell
foreach
(
capability 30 35 50
)
list
(
APPEND __arch_flags
" -gencode arch=compute_
${
capability
}
,code=sm_
${
capability
}
"
)
endforeach
()
if
(
CUDA_VERSION VERSION_GREATER
"7.0"
OR CUDA_VERSION VERSION_EQUAL
"7.0"
)
list
(
APPEND __arch_flags
" -gencode arch=compute_52,code=sm_52"
)
endif
()
# Modern gpu architectures: Pascal
if
(
CUDA_VERSION VERSION_GREATER
"8.0"
OR CUDA_VERSION VERSION_EQUAL
"8.0"
)
list
(
APPEND __arch_flags
" -gencode arch=compute_60,code=sm_60"
)
list
(
APPEND CUDA_NVCC_FLAGS --expt-relaxed-constexpr
)
endif
()
# Custom gpu architecture
set
(
CUDA_ARCH
)
if
(
CUDA_ARCH
)
specify_cuda_arch
(
${
CUDA_VERSION
}
${
CUDA_ARCH
}
)
endif
()
set
(
CUDA_NVCC_FLAGS
${
__arch_flags
}
${
CUDA_NVCC_FLAGS
}
)
cmake/util.cmake
浏览文件 @
5ee63bb6
...
...
@@ -115,8 +115,8 @@ function(link_paddle_exe TARGET_NAME)
target_link_libraries
(
${
TARGET_NAME
}
log
)
endif
(
ANDROID
)
if
(
WITH_MKL
DNN AND WITH_MKLML AND MKLDNN_IOMP_DIR
)
target_link_libraries
(
${
TARGET_NAME
}
"-L
${
MKL
DNN_IOMP
_DIR
}
-liomp5 -Wl,--as-needed"
)
if
(
WITH_MKL
ML AND MKLML_LIB_DIR AND MKLML_IOMP_LIB
)
target_link_libraries
(
${
TARGET_NAME
}
"-L
${
MKL
ML_LIB
_DIR
}
-liomp5 -Wl,--as-needed"
)
endif
()
add_dependencies
(
${
TARGET_NAME
}
${
external_project_dependencies
}
)
...
...
doc/api/v2/config/layer.rst
浏览文件 @
5ee63bb6
...
...
@@ -335,6 +335,16 @@ bilinear_interp
.. autoclass:: paddle.v2.layer.bilinear_interp
:noindex:
dot_prod
---------
.. autoclass:: paddle.v2.layer.dot_prod
:noindex:
out_prod
--------
.. autoclass:: paddle.v2.layer.out_prod
:noindex:
power
-----
.. autoclass:: paddle.v2.layer.power
...
...
doc/design/mkldnn/README.MD
浏览文件 @
5ee63bb6
...
...
@@ -36,13 +36,13 @@ Figure 1. PaddlePaddle on IA.
我们把集成方案大致分为了如下几个方面。
### CMake
我们会在
`CMakeLists.txt`
中会
添加
`WITH_MKLDNN`
的选项,当设置这个值为
`ON`
的时候会启用编译MKL-DNN功能。同时会自动开启OpenMP用于提高MKL-DNN的性能
。
我们会在
`CMakeLists.txt`
中会
给用户添加一个
`WITH_MKL`
的开关,他是负责
`WITH_MKLML`
和
`WITH_MKLDNN`
的总开关
。
同时,我们会引入
`WITH_MKLML`
选项,用于选择是否使用MKL-DNN自带的MKLML安装包。这个安装包可以独立于MKL-DNN使用,但是建议在开启MKL-DNN的同时也打开MKLML的开关,这样才能发挥最好的性
能。
当打开
`WITH_MKL`
时,会开启MKLML的功能,作为PaddlePaddle的CBLAS和LAPACK库,同时会开启Intel OpenMP用于提高MKLML的性能。 如果系统支持AVX2指令集及以上,同时会开启MKL-DNN功
能。
所以,我们会在
`cmake/external`
目录新建
`mkldnn.cmake`
和
`mklml.cmake`
文件,它们会在编译PaddlePaddle的时候下载对应的软件包,并放到PaddlePaddle的third party目录中
。
当关闭
`WITH_MKL`
时,MKLML和MKL-DNN功能会同时关闭
。
**备注**
:当
`WITH_MKLML=ON`
的时候,会优先使用这个包作为PaddlePaddle的CBLAS和LAPACK库,所以会稍微改动
`cmake/cblas.cmake`
中的逻辑
。
所以,我们会在
`cmake/external`
目录新建
`mkldnn.cmake`
和
`mklml.cmake`
文件,它们会在编译PaddlePaddle的时候下载对应的软件包,并放到PaddlePaddle的third party目录中
。
### Layers
所有MKL-DNN相关的C++ layers,都会按照PaddlePaddle的目录结构存放在
...
...
doc/howto/dev/write_docs_cn.rst
浏览文件 @
5ee63bb6
...
...
@@ -34,7 +34,7 @@ PaddlePaddle的文档构建有两种方式。
cd TO_YOUR_PADDLE_CLONE_PATH
mkdir -p build
cd build
cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKL
DNN=OFF -DWITH_MKLML
=OFF -DWITH_DOC=ON
cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKL=OFF -DWITH_DOC=ON
make gen_proto_py
make paddle_docs paddle_docs_cn
...
...
doc/mobile/cross_compiling_for_android_cn.md
浏览文件 @
5ee63bb6
#
构建Android平台上的PaddlePaddle库
#
Android平台编译指南
用户可通过如下两种方式,交叉编译Android平台上适用的PaddlePaddle库:
-
基于Docker容器的编译方式
...
...
doc/mobile/cross_compiling_for_ios_cn.md
浏览文件 @
5ee63bb6
#
构建iOS平台上的PaddlePaddle库
#
iOS平台编译指南
交叉编译iOS平台上适用的PaddlePaddle库,需要在MacOS系统上进行。本文的将介绍在MacOS上,从源码交叉编译iOS平台上适用的PaddlePaddle库。
## 准备交叉编译环境
...
...
@@ -25,7 +25,7 @@ iOS平台可选配置参数:
-
`IOS_PLATFORM`
,可设置为
`OS/SIMULATOR`
,默认值为
`OS`
。
-
`OS`
,构建目标为
`arm`
架构的iPhone或者iPad等物理设备。
-
`SIMULATOR`
,构建目标为
`x86`
架构的模拟器平台。
-
`IOS_ARCH`
,目标架构。针对不同的
`IOS_PLATFORM`
,可设置的目标架构如下表所示:
-
`IOS_ARCH`
,目标架构。针对不同的
`IOS_PLATFORM`
,可设置的目标架构如下表所示
,默认编译所有架构
:
<table class="docutils">
<colgroup>
...
...
@@ -41,11 +41,11 @@ iOS平台可选配置参数:
<tbody valign="top">
<tr class="row-even">
<td>OS</td>
<td>armv7, armv7s, arm64
(默认)
</td>
<td>armv7, armv7s, arm64 </td>
</tr>
<tr class="row-odd">
<td>SIMULATOR</td>
<td>i386, x86_64
(默认)
</td>
<td>i386, x86_64 </td>
</tr>
</tbody>
</table>
...
...
@@ -66,7 +66,7 @@ iOS平台可选配置参数:
```
bash
cmake
-DCMAKE_SYSTEM_NAME
=
iOS
\
-DIOS_PLATFORM
=
OS
\
-DIOS_ARCH
=
"arm64"
\
-DIOS_ARCH
=
"arm
v7;arm
64"
\
-DIOS_ENABLE_BITCODE
=
ON
\
-DIOS_USE_VECLIB_FOR_BLAS
=
ON
\
-DCMAKE_INSTALL_PREFIX
=
your/path/to/install
\
...
...
@@ -112,6 +112,6 @@ $ make install
-
`lib`
目录,其中包含PaddlePaddle的C-API静态库
-
`third_party`
目录,其中包含所依赖的所有第三方库
注意,
不同架构的PaddlePaddle库建议安装到不同的目录下,然后使用
`lipo`
工具将多个静态库合并成一个支持多个架构的
fat库。
注意,
如果PaddlePaddle库需要同时支持真机和模拟器,则需要分别编译真机和模拟器版本,然后使用
`lipo`
工具合并
fat库。
自此,PaddlePaddle库已经安装完成,用户可将合成的fat库用于深度学习相关的iOS App中,调用方法见C-API文档。
doc/mobile/cross_compiling_for_raspberry_cn.md
浏览文件 @
5ee63bb6
#
构建Raspberry Pi平台上的PaddlePaddle库
#
Raspberry Pi平台编译指南
通常有两个方法来构建基于 Rasspberry Pi 的版本:
...
...
paddle/cuda/include/hl_gpu.h
浏览文件 @
5ee63bb6
...
...
@@ -25,7 +25,9 @@ limitations under the License. */
#include "hl_matrix.h"
#include "hl_sequence.h"
#include "hl_sparse.h"
#ifndef PADDLE_MOBILE_INFERENCE
#include "hl_warpctc_wrap.h"
#endif
#ifdef HPPL_STUB_FUNC
#include "stub/hl_aggregate_stub.h"
...
...
paddle/gserver/CMakeLists.txt
浏览文件 @
5ee63bb6
...
...
@@ -73,7 +73,6 @@ if(MOBILE_INFERENCE)
list
(
REMOVE_ITEM GSERVER_SOURCES
dataproviders/DataProvider.cpp
dataproviders/MultiDataProvider.cpp
dataproviders/ProtoDataProvider.cpp
dataproviders/PyDataProvider2.cpp
dataproviders/PyDataProvider.cpp
)
...
...
paddle/gserver/dataproviders/DataProvider.cpp
浏览文件 @
5ee63bb6
...
...
@@ -16,8 +16,8 @@ limitations under the License. */
#include <unistd.h>
#include <algorithm>
#include "ProtoDataProvider.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
#include "paddle/utils/StringUtil.h"
#include "paddle/utils/Util.h"
...
...
@@ -164,8 +164,6 @@ DataProvider* DataProvider::create(const DataConfig& config,
REGISTER_DATA_PROVIDER
(
simple
,
SimpleDataProvider
);
REGISTER_DATA_PROVIDER
(
dummy
,
DummyDataProvider
);
REGISTER_DATA_PROVIDER
(
proto
,
ProtoDataProvider
);
REGISTER_DATA_PROVIDER
(
proto_sequence
,
ProtoSequenceDataProvider
);
int64_t
DataProvider
::
getNextBatch
(
int64_t
size
,
DataBatch
*
batch
)
{
int64_t
batchSize
=
doubleBuffer_
?
getNextBatchFromBuffer
(
size
,
batch
)
...
...
paddle/gserver/dataproviders/ProtoDataProvider.cpp
已删除
100644 → 0
浏览文件 @
571ef90c
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "ProtoDataProvider.h"
#include <algorithm>
#include <fstream>
#include <istream>
#include "paddle/utils/StringUtil.h"
#include "paddle/utils/Util.h"
#include "DataProviderGroup.h"
#include "paddle/utils/Logging.h"
DEFINE_double
(
memory_threshold_on_load_data
,
1.0
,
"stop loading data when memory is not sufficient"
);
namespace
paddle
{
REGISTER_DATA_PROVIDER
(
proto_group
,
DataProviderGroup
<
ProtoDataProvider
>
);
REGISTER_DATA_PROVIDER
(
proto_sequence_group
,
DataProviderGroup
<
ProtoSequenceDataProvider
>
);
ProtoDataProvider
::
ProtoDataProvider
(
const
DataConfig
&
config
,
bool
useGpu
,
bool
loadDataAll
)
:
DataProvider
(
config
,
useGpu
),
sampleNums_
(
0
),
currentSequenceIndex_
(
0
)
{
if
(
loadDataAll
)
{
loadData
(
config_
.
files
());
}
}
void
ProtoDataProvider
::
loadData
(
const
std
::
vector
<
std
::
string
>&
fileList
)
{
for
(
auto
&
file
:
fileList
)
{
if
(
FLAGS_memory_threshold_on_load_data
<
1.0
)
{
double
memUsage
=
getMemoryUsage
();
if
(
memUsage
>
FLAGS_memory_threshold_on_load_data
)
{
LOG
(
INFO
)
<<
"memUsage is "
<<
memUsage
<<
", > "
<<
FLAGS_memory_threshold_on_load_data
<<
" therefore SKIP ALL REMAINING file."
;
break
;
}
}
LOG
(
INFO
)
<<
"load data file "
<<
file
;
loadDataFile
(
file
);
}
if
(
sequenceStartPositions_
.
size
()
==
sampleNums_
)
{
// This means that each sample is one sequence
shuffledSequenceIds_
.
swap
(
sequenceStartPositions_
);
}
else
{
sequenceStartPositions_
.
push_back
(
sampleNums_
);
shuffledSequenceIds_
.
reserve
(
sequenceStartPositions_
.
size
()
-
1
);
for
(
size_t
i
=
0
;
i
<
sequenceStartPositions_
.
size
()
-
1
;
++
i
)
{
shuffledSequenceIds_
.
push_back
(
i
);
}
}
LOG
(
INFO
)
<<
"read done, num of instance="
<<
sampleNums_
;
showDataStats
();
}
void
ProtoDataProvider
::
loadData
(
const
std
::
string
&
fileName
)
{
std
::
vector
<
std
::
string
>
fileList
;
loadFileList
(
fileName
,
fileList
);
loadData
(
fileList
);
}
void
ProtoDataProvider
::
checkDataHeader
(
const
DataHeader
&
header
)
{
if
(
header_
.
slot_defs_size
())
{
// header_ is already set. Need to check consistency.
CHECK_EQ
(
header_
.
slot_defs_size
(),
header
.
slot_defs_size
())
<<
"Different header"
;
for
(
int
i
=
0
;
i
<
header
.
slot_defs_size
();
++
i
)
{
CHECK_EQ
(
header_
.
slot_defs
(
i
).
type
(),
header
.
slot_defs
(
i
).
type
());
CHECK_EQ
(
header_
.
slot_defs
(
i
).
dim
(),
header
.
slot_defs
(
i
).
dim
());
}
return
;
}
// header_ is not set before
CHECK
(
header
.
slot_defs_size
())
<<
"Invalid header: no slot is defined"
;
int
i
;
for
(
i
=
0
;
i
<
header
.
slot_defs_size
();
++
i
)
{
if
(
header
.
slot_defs
(
i
).
type
()
==
SlotDef
::
INDEX
||
header
.
slot_defs
(
i
).
type
()
==
SlotDef
::
VAR_MDIM_INDEX
)
{
break
;
}
constexpr
int
kBufLen
=
100
;
char
buf
[
kBufLen
];
snprintf
(
buf
,
kBufLen
,
"slot%d_nnz"
,
i
);
nnzStats_
.
push_back
(
getStat
(
buf
));
}
numVecSlots_
=
i
;
// Check that INDEX slots are after VECTOR slots
for
(
int
i
=
numVecSlots_
;
i
<
header
.
slot_defs_size
();
++
i
)
{
CHECK
(
header
.
slot_defs
(
i
).
type
()
==
SlotDef
::
INDEX
||
header
.
slot_defs
(
i
).
type
()
==
SlotDef
::
VAR_MDIM_INDEX
);
}
slots_
.
clear
();
slots_
.
reserve
(
header
.
slot_defs_size
());
for
(
int
i
=
0
;
i
<
header
.
slot_defs_size
();
++
i
)
{
slots_
.
emplace_back
();
slots_
.
back
().
type
=
header
.
slot_defs
(
i
).
type
();
slots_
.
back
().
dim
=
header
.
slot_defs
(
i
).
dim
();
if
(
SlotDef
::
VECTOR_SPARSE_NON_VALUE
==
header
.
slot_defs
(
i
).
type
()
||
SlotDef
::
VECTOR_SPARSE_VALUE
==
header
.
slot_defs
(
i
).
type
())
{
slots_
.
back
().
indices
.
push_back
(
0
);
}
}
header_
=
header
;
}
void
ProtoDataProvider
::
checkSample
(
const
DataSample
&
sample
)
{
CHECK_EQ
(
numVecSlots_
,
sample
.
vector_slots_size
());
CHECK
(
header_
.
slot_defs_size
()
==
numVecSlots_
+
sample
.
id_slots_size
()
||
header_
.
slot_defs_size
()
==
numVecSlots_
+
sample
.
var_id_slots_size
());
for
(
int
i
=
0
;
i
<
numVecSlots_
;
++
i
)
{
uint32_t
dim
=
header_
.
slot_defs
(
i
).
dim
();
switch
(
header_
.
slot_defs
(
i
).
type
())
{
case
SlotDef
::
VECTOR_DENSE
:
{
CHECK_EQ
(
static_cast
<
int
>
(
dim
),
sample
.
vector_slots
(
i
).
values_size
());
CHECK_EQ
(
0
,
sample
.
vector_slots
(
i
).
ids_size
());
break
;
}
case
SlotDef
::
VECTOR_SPARSE_NON_VALUE
:
{
if
(
0
==
sample
.
vector_slots
(
i
).
ids_size
())
{
break
;
}
CHECK_LT
(
0
,
sample
.
vector_slots
(
i
).
ids_size
());
CHECK_EQ
(
0
,
sample
.
vector_slots
(
i
).
values_size
());
auto
maxId
=
*
std
::
max_element
(
sample
.
vector_slots
(
i
).
ids
().
begin
(),
sample
.
vector_slots
(
i
).
ids
().
end
());
CHECK_GT
(
dim
,
maxId
);
break
;
}
case
SlotDef
::
VECTOR_SPARSE_VALUE
:
{
if
(
0
==
sample
.
vector_slots
(
i
).
ids_size
())
{
CHECK_EQ
(
0
,
sample
.
vector_slots
(
i
).
values_size
());
break
;
}
CHECK_LT
(
0
,
sample
.
vector_slots
(
i
).
values_size
());
CHECK_GE
(
static_cast
<
int
>
(
dim
),
sample
.
vector_slots
(
i
).
values_size
());
CHECK_EQ
(
sample
.
vector_slots
(
i
).
values_size
(),
sample
.
vector_slots
(
i
).
ids_size
());
auto
maxId
=
*
std
::
max_element
(
sample
.
vector_slots
(
i
).
ids
().
begin
(),
sample
.
vector_slots
(
i
).
ids
().
end
());
CHECK_GT
(
dim
,
maxId
);
break
;
}
case
SlotDef
::
VAR_MDIM_DENSE
:
{
if
(
static_cast
<
int
>
(
dim
)
!=
0
)
{
CHECK_EQ
(
static_cast
<
int
>
(
dim
),
sample
.
vector_slots
(
i
).
values_size
());
if
(
sample
.
vector_slots
(
i
).
dims_size
()
!=
0
)
{
int
totalDim
=
sample
.
vector_slots
(
i
).
dims
(
0
);
for
(
int
j
=
1
;
j
<
sample
.
vector_slots
(
i
).
dims_size
();
++
j
)
{
totalDim
*=
sample
.
vector_slots
(
i
).
dims
(
j
);
}
CHECK_EQ
(
static_cast
<
int
>
(
dim
),
totalDim
);
}
}
else
{
CHECK_NE
(
sample
.
vector_slots
(
i
).
dims_size
(),
0
);
int
totalDim
=
sample
.
vector_slots
(
i
).
dims
(
0
);
for
(
int
j
=
1
;
j
<
sample
.
vector_slots
(
i
).
dims_size
();
++
j
)
{
totalDim
*=
sample
.
vector_slots
(
i
).
dims
(
j
);
}
CHECK_EQ
(
totalDim
,
sample
.
vector_slots
(
i
).
values_size
());
}
break
;
}
case
SlotDef
::
STRING
:
{
CHECK_EQ
(
static_cast
<
int
>
(
1
),
sample
.
vector_slots
(
i
).
strs_size
());
CHECK_EQ
(
0
,
sample
.
vector_slots
(
i
).
ids_size
());
CHECK_EQ
(
0
,
sample
.
vector_slots
(
i
).
values_size
());
break
;
}
default:
LOG
(
FATAL
)
<<
"BUG: Should not reach here"
;
}
}
for
(
int
i
=
numVecSlots_
;
i
<
header_
.
slot_defs_size
();
++
i
)
{
if
(
header_
.
slot_defs
(
i
).
type
()
!=
SlotDef
::
VAR_MDIM_INDEX
)
{
uint32_t
id
=
sample
.
id_slots
(
i
-
numVecSlots_
);
if
(
id
==
-
1U
)
continue
;
CHECK_LT
(
id
,
header_
.
slot_defs
(
i
).
dim
());
}
else
{
for
(
int
j
=
0
;
j
<
sample
.
var_id_slots
(
i
-
numVecSlots_
).
ids_size
();
++
j
)
{
uint32_t
id
=
sample
.
var_id_slots
(
i
-
numVecSlots_
).
ids
(
j
);
CHECK_LT
(
id
,
header_
.
slot_defs
(
i
).
dim
());
}
}
}
}
void
ProtoDataProvider
::
loadDataFile
(
const
std
::
string
&
fileName
)
{
std
::
ifstream
is
(
fileName
);
CHECK
(
is
)
<<
"Fail to open "
<<
fileName
;
bool
dataCompression
=
str
::
endsWith
(
fileName
,
".gz"
);
std
::
unique_ptr
<
ProtoReader
>
reader
(
new
ProtoReader
(
&
is
,
dataCompression
));
CHECK
(
reader
)
<<
"Fail to create proto data input stream"
;
DataHeader
header
;
CHECK
(
reader
->
read
(
&
header
));
checkDataHeader
(
header
);
DataSample
sample
;
do
{
if
(
!
reader
->
read
(
&
sample
))
{
break
;
}
checkSample
(
sample
);
if
(
sample
.
is_beginning
())
{
sequenceStartPositions_
.
push_back
(
sampleNums_
);
}
fillSlots
(
sample
);
++
sampleNums_
;
}
while
(
true
);
CHECK
(
is
.
eof
())
<<
"Fail to read file"
;
reader
.
reset
(
nullptr
);
is
.
close
();
}
// checkSample has done before, no check here
void
ProtoDataProvider
::
fillSlots
(
const
DataSample
&
sample
)
{
for
(
size_t
i
=
0
;
i
<
slots_
.
size
();
++
i
)
{
auto
&
slot
=
slots_
[
i
];
int
dim
=
slot
.
dim
;
switch
(
slot
.
type
)
{
case
SlotDef
::
VECTOR_DENSE
:
{
size_t
oldSize
=
slot
.
denseData
.
size
();
slot
.
denseData
.
resize
(
oldSize
+
dim
);
const
float
*
values
=
sample
.
vector_slots
(
i
).
values
().
data
();
#ifdef PADDLE_TYPE_DOUBLE
std
::
copy
(
values
,
values
+
dim
,
slot
.
denseData
.
begin
()
+
oldSize
);
#else
memcpy
(
slot
.
denseData
.
data
()
+
oldSize
,
values
,
sizeof
(
real
)
*
dim
);
#endif
break
;
}
case
SlotDef
::
VECTOR_SPARSE_NON_VALUE
:
{
int
slotSize
=
sample
.
vector_slots
(
i
).
ids_size
();
int
subSlotSize
=
0
;
int
id
=
0
;
// the slot id
// find whether this vector_slots has subseq. If not has subseq,
// subSlotSize = 0.
for
(
id
=
0
;
id
<
sample
.
subseq_slots_size
();
id
++
)
{
if
(
sample
.
subseq_slots
(
id
).
slot_id
()
==
i
)
{
subSlotSize
=
sample
.
subseq_slots
(
id
).
lens_size
();
break
;
}
}
if
(
subSlotSize
&&
slot
.
subIndices
.
size
()
==
0UL
)
{
// If has subSeq, the first element of subIndices = 0.
slot
.
subIndices
.
push_back
(
0
);
}
if
(
slotSize
==
0UL
)
{
// if has no id, new indices = old indices.
slot
.
indices
.
push_back
(
slot
.
indices
.
back
());
// if has subSeq, new subIndices = old subIndices.
if
(
slot
.
subIndices
.
size
())
{
slot
.
subIndices
.
push_back
(
slot
.
subIndices
.
back
());
}
break
;
}
slot
.
sparseNonValueData
.
resize
(
slot
.
indices
.
back
()
+
slotSize
);
const
unsigned
int
*
ids
=
sample
.
vector_slots
(
i
).
ids
().
data
();
memcpy
(
slot
.
sparseNonValueData
.
data
()
+
slot
.
indices
.
back
(),
ids
,
sizeof
(
*
ids
)
*
slotSize
);
slot
.
indices
.
push_back
(
slot
.
indices
.
back
()
+
slotSize
);
if
(
subSlotSize
)
{
for
(
int
ii
=
0
;
ii
<
subSlotSize
;
++
ii
)
{
slot
.
subIndices
.
push_back
(
slot
.
subIndices
.
back
()
+
sample
.
subseq_slots
(
id
).
lens
(
ii
));
}
}
break
;
}
case
SlotDef
::
VECTOR_SPARSE_VALUE
:
{
if
(
0
==
sample
.
vector_slots
(
i
).
ids_size
())
{
slot
.
indices
.
push_back
(
slot
.
indices
.
back
());
break
;
}
int
slotSize
=
sample
.
vector_slots
(
i
).
ids_size
();
slot
.
sparseFloatValueData
.
resize
(
slot
.
indices
.
back
()
+
slotSize
);
const
unsigned
int
*
ids
=
sample
.
vector_slots
(
i
).
ids
().
data
();
const
float
*
values
=
sample
.
vector_slots
(
i
).
values
().
data
();
for
(
int
ii
=
0
;
ii
<
slotSize
;
++
ii
)
{
slot
.
sparseFloatValueData
[
slot
.
indices
.
back
()
+
ii
].
col
=
ids
[
ii
];
slot
.
sparseFloatValueData
[
slot
.
indices
.
back
()
+
ii
].
value
=
values
[
ii
];
}
slot
.
indices
.
push_back
(
slot
.
indices
.
back
()
+
slotSize
);
break
;
}
case
SlotDef
::
INDEX
:
{
slot
.
indexData
.
push_back
(
sample
.
id_slots
(
i
-
numVecSlots_
));
break
;
}
case
SlotDef
::
VAR_MDIM_DENSE
:
{
size_t
oldSize
=
slot
.
varDenseData
.
size
();
slot
.
varDenseData
.
resize
(
oldSize
+
1
);
size_t
varDim
=
sample
.
vector_slots
(
i
).
values_size
();
slot
.
varDenseData
[
oldSize
].
data
.
resize
(
varDim
);
const
float
*
values
=
sample
.
vector_slots
(
i
).
values
().
data
();
#ifdef PADDLE_TYPE_DOUBLE
std
::
copy
(
values
,
values
+
varDim
,
slot
.
varDenseData
[
oldSize
].
data
.
data
());
#else
memcpy
(
slot
.
varDenseData
[
oldSize
].
data
.
data
(),
values
,
sizeof
(
real
)
*
varDim
);
#endif
slot
.
varDenseData
[
oldSize
].
dims
.
resize
(
sample
.
vector_slots
(
i
).
dims_size
());
memcpy
(
slot
.
varDenseData
[
oldSize
].
dims
.
data
(),
sample
.
vector_slots
(
i
).
dims
().
data
(),
sizeof
(
uint32_t
)
*
sample
.
vector_slots
(
i
).
dims_size
());
break
;
}
case
SlotDef
::
VAR_MDIM_INDEX
:
{
size_t
oldSize
=
slot
.
varIndices
.
size
();
slot
.
varIndices
.
resize
(
oldSize
+
1
);
size_t
varDim
=
sample
.
var_id_slots
(
i
-
numVecSlots_
).
ids_size
();
slot
.
varIndices
[
oldSize
].
resize
(
varDim
);
memcpy
(
slot
.
varIndices
[
oldSize
].
data
(),
sample
.
var_id_slots
(
i
-
numVecSlots_
).
ids
().
data
(),
sizeof
(
uint32_t
)
*
varDim
);
break
;
}
case
SlotDef
::
STRING
:
{
slot
.
strData
.
push_back
(
sample
.
vector_slots
(
i
).
strs
(
0
));
break
;
}
}
}
}
void
ProtoDataProvider
::
showDataStats
()
{
std
::
ostringstream
oss
;
for
(
size_t
i
=
0
;
i
<
slots_
.
size
();
++
i
)
{
auto
&
slot
=
slots_
[
i
];
if
(
slot
.
type
==
SlotDef
::
VECTOR_SPARSE_NON_VALUE
)
{
size_t
nnz
=
slot
.
sparseNonValueData
.
size
();
oss
<<
"slot"
<<
i
<<
":avgNNZ="
<<
((
double
)
nnz
/
sampleNums_
)
<<
"; "
;
}
else
if
(
slot
.
type
==
SlotDef
::
VECTOR_SPARSE_VALUE
)
{
size_t
nnz
=
slot
.
sparseFloatValueData
.
size
();
oss
<<
"slot"
<<
i
<<
":avgNNZ="
<<
((
double
)
nnz
/
sampleNums_
)
<<
"; "
;
}
}
LOG
(
INFO
)
<<
oss
.
str
();
}
void
ProtoDataProvider
::
reset
()
{
currentSequenceIndex_
=
0
;
if
(
!
skipShuffle_
)
{
shuffle
();
}
DataProvider
::
reset
();
}
void
ProtoDataProvider
::
shuffle
()
{
std
::
shuffle
(
shuffledSequenceIds_
.
begin
(),
shuffledSequenceIds_
.
end
(),
ThreadLocalRandomEngine
::
get
());
}
/*
Loop through sequences starting from currentSequenceIndex_
for at most size samples. For each sequence ranging from [begin, end),
op(begin, end) will be called.
return the number of sequences scanned
*/
template
<
class
Op
>
int64_t
ProtoDataProvider
::
sequenceLoop
(
Op
op
,
int64_t
size
)
{
int64_t
sz
=
0
;
size_t
i
;
size_t
sequenceCount
=
shuffledSequenceIds_
.
size
();
if
(
usageRatio_
<
1.0
f
)
{
sequenceCount
=
static_cast
<
int64_t
>
(
sequenceCount
*
usageRatio_
);
}
for
(
i
=
currentSequenceIndex_
;
i
<
sequenceCount
;
++
i
)
{
size_t
id
=
shuffledSequenceIds_
[
i
];
int64_t
begin
=
sequenceStartPositions_
[
id
];
int64_t
end
=
sequenceStartPositions_
[
id
+
1
];
int64_t
len
=
end
-
begin
;
if
(
sz
+
len
>
size
&&
sz
>
0
)
break
;
sz
+=
len
;
op
(
begin
,
end
);
}
return
i
-
currentSequenceIndex_
;
}
/*
Loop through sequences starting from currentSequenceIndex_
for at most size samples. For each sample of each sequence at position
pos, op(pos) will be called.
return the number of sequences scanned
*/
template
<
class
Op
>
int64_t
ProtoDataProvider
::
sampleLoop
(
Op
op
,
int64_t
size
)
{
if
(
iidData
())
{
size
=
std
::
min
<
int64_t
>
(
sampleNums_
-
currentSequenceIndex_
,
size
);
for
(
int64_t
i
=
currentSequenceIndex_
;
i
<
currentSequenceIndex_
+
size
;
++
i
)
{
size_t
pos
=
shuffledSequenceIds_
[
i
];
op
(
pos
);
}
return
size
;
}
else
{
auto
f
=
[
op
](
int64_t
begin
,
int64_t
end
)
{
for
(
int64_t
pos
=
begin
;
pos
<
end
;
++
pos
)
{
op
(
pos
);
}
};
return
sequenceLoop
(
f
,
size
);
}
}
/*
Loop through sub-sequences starting from currentSequenceIndex_
for at most size samples. For each sample of each sub-sequence at position
pos, op(pos) will be called.
return the number of sub-sequences scanned
*/
template
<
class
Op
>
int64_t
ProtoDataProvider
::
subSampleLoop
(
Op
op
,
int64_t
size
,
int
slot
)
{
CHECK
(
iidData
())
<<
"subSampleLoop only accepts iid data"
;
size
=
std
::
min
<
int64_t
>
(
sampleNums_
-
currentSequenceIndex_
,
size
);
int
subSize
=
0
;
for
(
int64_t
i
=
currentSequenceIndex_
;
i
<
currentSequenceIndex_
+
size
;
++
i
)
{
size_t
pos
=
shuffledSequenceIds_
[
i
];
int64_t
*
indexs
=
slots_
[
slot
].
indices
.
data
();
int64_t
*
subIndexs
=
slots_
[
slot
].
subIndices
.
data
();
int64_t
subSeqStart
=
0
;
int64_t
subSeqEnd
=
0
;
for
(
int
j
=
0
;
j
<
(
int
)
slots_
[
slot
].
subIndices
.
size
();
j
++
)
{
if
(
subIndexs
[
j
]
==
indexs
[
pos
])
{
subSeqStart
=
j
;
if
(
subIndexs
[
pos
]
==
subIndexs
[
pos
+
1
])
{
subSeqEnd
=
j
+
1
;
break
;
}
}
else
if
(
subIndexs
[
j
]
==
indexs
[
pos
+
1
])
{
subSeqEnd
=
j
;
break
;
}
}
for
(
int
j
=
subSeqStart
;
j
<
subSeqEnd
;
j
++
)
{
op
(
j
);
}
subSize
+=
subSeqEnd
-
subSeqStart
;
}
return
subSize
;
}
int64_t
ProtoDataProvider
::
getNextBatchInternal
(
int64_t
size
,
DataBatch
*
batch
)
{
int64_t
numSequences
=
0
;
// actual number of sequences in the batch
// the number of sequences scanned, including those skipped because too long
int64_t
numScannedSeqs
=
0
;
std
::
lock_guard
<
RWLock
>
guard
(
lock_
);
if
(
iidData
())
{
size
=
std
::
min
<
int64_t
>
(
getSize
()
-
currentSequenceIndex_
,
size
);
numScannedSeqs
=
numSequences
=
size
;
}
else
{
int64_t
sz
=
0
;
auto
op
=
[
&
sz
,
&
numSequences
](
int64_t
begin
,
int64_t
end
)
{
++
numSequences
;
sz
+=
end
-
begin
;
};
numScannedSeqs
=
sequenceLoop
(
op
,
size
);
VLOG_IF
(
1
,
numScannedSeqs
>
numSequences
)
<<
numScannedSeqs
-
numSequences
<<
" sequences are skipped because longer than "
<<
size
;
size
=
sz
;
}
if
(
size
<=
0
)
return
0
;
DataBatch
&
cpuBatch
=
*
cpuBatch_
;
std
::
vector
<
Argument
>&
cpuArguments
=
cpuBatch
.
getStreams
();
cpuBatch
.
setSize
(
size
);
cpuArguments
.
resize
(
header_
.
slot_defs_size
());
if
(
!
iidData
())
{
ICpuGpuVector
::
resizeOrCreate
(
cpuArguments
[
0
].
sequenceStartPositions
,
numSequences
+
1
,
/* useGpu= */
false
);
int
*
buf
=
cpuArguments
[
0
].
sequenceStartPositions
->
getMutableData
(
false
);
int
pos
=
0
;
int
i
=
0
;
auto
op
=
[
buf
,
&
pos
,
&
i
](
int64_t
begin
,
int64_t
end
)
{
buf
[
i
]
=
pos
;
pos
+=
end
-
begin
;
++
i
;
};
sequenceLoop
(
op
,
size
);
buf
[
i
]
=
size
;
for
(
size_t
slot
=
1
;
slot
<
cpuArguments
.
size
();
++
slot
)
{
cpuArguments
[
slot
].
sequenceStartPositions
=
cpuArguments
[
0
].
sequenceStartPositions
;
}
}
for
(
int
slot
=
0
;
slot
<
header_
.
slot_defs_size
();
++
slot
)
{
size_t
dim
=
header_
.
slot_defs
(
slot
).
dim
();
SlotDef
::
SlotType
slotType
=
header_
.
slot_defs
(
slot
).
type
();
std
::
vector
<
int64_t
>
dataPos
;
dataPos
.
reserve
(
size
);
auto
op
=
[
this
,
&
dataPos
](
int64_t
pos
)
{
dataPos
.
push_back
(
pos
);
};
sampleLoop
(
op
,
size
);
switch
(
slotType
)
{
case
SlotDef
::
VECTOR_DENSE
:
{
Matrix
::
resizeOrCreate
(
cpuArguments
[
slot
].
value
,
size
,
dim
,
false
,
// trans = false
false
);
// useGpu = false
real
*
buf
=
cpuArguments
[
slot
].
value
->
getData
();
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
memcpy
(
buf
+
i
*
dim
,
slots_
[
slot
].
denseData
.
data
()
+
dataPos
[
i
]
*
dim
,
sizeof
(
real
)
*
dim
);
}
break
;
}
case
SlotDef
::
VECTOR_SPARSE_NON_VALUE
:
{
if
(
!
(
cpuArguments
[
slot
].
value
))
{
cpuArguments
[
slot
].
value
=
Matrix
::
createSparseMatrix
(
size
,
dim
,
size
/*DEFAULT_AVG_WIDTH = 1*/
,
NO_VALUE
,
SPARSE_CSR
,
false
,
useGpu_
);
}
auto
mat
=
cpuArguments
[
slot
].
value
;
mat
->
resize
(
size
,
dim
);
if
(
std
::
dynamic_pointer_cast
<
GpuSparseMatrix
>
(
mat
))
{
std
::
dynamic_pointer_cast
<
GpuSparseMatrix
>
(
mat
)
->
copyFrom
(
dataPos
.
data
(),
slots_
[
slot
].
indices
.
data
(),
slots_
[
slot
].
sparseNonValueData
.
data
(),
HPPL_STREAM_1
);
}
else
if
(
std
::
dynamic_pointer_cast
<
CpuSparseMatrix
>
(
mat
))
{
std
::
dynamic_pointer_cast
<
CpuSparseMatrix
>
(
mat
)
->
copyFrom
(
dataPos
.
data
(),
slots_
[
slot
].
indices
.
data
(),
slots_
[
slot
].
sparseNonValueData
.
data
());
}
else
{
LOG
(
FATAL
)
<<
"Not Supported"
;
}
size_t
numElements
=
0
;
for
(
auto
pos
:
dataPos
)
{
numElements
+=
slots_
[
slot
].
indices
[
pos
+
1
]
-
slots_
[
slot
].
indices
[
pos
];
}
nnzStats_
[
slot
]
->
addSample
(
numElements
);
break
;
}
case
SlotDef
::
VECTOR_SPARSE_VALUE
:
{
if
(
!
(
cpuArguments
[
slot
].
value
))
{
cpuArguments
[
slot
].
value
=
Matrix
::
createSparseMatrix
(
size
,
dim
,
size
/*DEFAULT_AVG_WIDTH = 1*/
,
FLOAT_VALUE
,
SPARSE_CSR
,
false
,
useGpu_
);
}
auto
mat
=
cpuArguments
[
slot
].
value
;
mat
->
resize
(
size
,
dim
);
if
(
std
::
dynamic_pointer_cast
<
GpuSparseMatrix
>
(
mat
))
{
std
::
dynamic_pointer_cast
<
GpuSparseMatrix
>
(
mat
)
->
copyFrom
(
dataPos
.
data
(),
slots_
[
slot
].
indices
.
data
(),
slots_
[
slot
].
sparseFloatValueData
.
data
(),
HPPL_STREAM_1
);
}
else
if
(
std
::
dynamic_pointer_cast
<
CpuSparseMatrix
>
(
mat
))
{
std
::
dynamic_pointer_cast
<
CpuSparseMatrix
>
(
mat
)
->
copyFrom
(
dataPos
.
data
(),
slots_
[
slot
].
indices
.
data
(),
slots_
[
slot
].
sparseFloatValueData
.
data
());
}
else
{
LOG
(
FATAL
)
<<
"Not Supported"
;
}
break
;
}
case
SlotDef
::
INDEX
:
{
IVector
::
resizeOrCreate
(
cpuArguments
[
slot
].
ids
,
size
,
/* useGpu= */
false
);
int
*
buf
=
cpuArguments
[
slot
].
ids
->
getData
();
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
buf
[
i
]
=
slots_
[
slot
].
indexData
[
dataPos
[
i
]];
}
break
;
}
case
SlotDef
::
VAR_MDIM_DENSE
:
{
CHECK_EQ
(
size
,
1
);
auto
mat
=
cpuArguments
[
slot
].
value
;
size_t
totalDim
=
slots_
[
slot
].
varDenseData
[
dataPos
[
0
]].
data
.
size
();
CHECK_EQ
(
slots_
[
slot
].
varDenseData
[
dataPos
[
0
]].
dims
.
size
(),
size_t
(
3
));
size_t
height
,
width
,
depth
,
oldWidth
;
/* dims[2] is depth, will be changed to dims[0] in future */
depth
=
slots_
[
slot
].
varDenseData
[
dataPos
[
0
]].
dims
[
2
];
height
=
slots_
[
slot
].
varDenseData
[
dataPos
[
0
]].
dims
[
1
];
width
=
slots_
[
slot
].
varDenseData
[
dataPos
[
0
]].
dims
[
0
];
oldWidth
=
width
;
/* process the undesirable sample */
if
(
oldWidth
<
height
)
{
width
=
height
;
}
cpuArguments
[
slot
].
setFrameHeight
(
height
);
cpuArguments
[
slot
].
setFrameWidth
(
width
);
if
(
oldWidth
<
height
)
{
totalDim
=
width
*
height
*
depth
;
}
Matrix
::
resizeOrCreate
(
cpuArguments
[
slot
].
value
,
size
,
totalDim
,
false
,
// trans = false
false
);
// useGpu = false
real
*
buf
=
cpuArguments
[
slot
].
value
->
getData
();
cpuArguments
[
slot
].
value
->
zeroMem
();
if
(
oldWidth
<
height
)
{
real
*
srcBuf
=
slots_
[
slot
].
varDenseData
[
dataPos
[
0
]].
data
.
data
();
for
(
size_t
i
=
0
;
i
<
depth
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
height
;
j
++
)
{
for
(
size_t
k
=
0
;
k
<
oldWidth
;
k
++
)
{
buf
[
i
*
height
*
width
+
j
*
width
+
k
]
=
srcBuf
[
i
*
height
*
oldWidth
+
j
*
oldWidth
+
k
];
}
}
}
}
else
{
memcpy
(
buf
,
slots_
[
slot
].
varDenseData
[
dataPos
[
0
]].
data
.
data
(),
sizeof
(
real
)
*
totalDim
);
}
ICpuGpuVector
::
resizeOrCreate
(
cpuArguments
[
slot
].
sequenceStartPositions
,
size
+
1
,
/* size == 1 currently */
/* useGpu= */
false
);
int
*
bufStarts
=
cpuArguments
[
slot
].
sequenceStartPositions
->
getMutableData
(
false
);
bufStarts
[
0
]
=
0
;
bufStarts
[
1
]
=
1
;
break
;
}
case
SlotDef
::
VAR_MDIM_INDEX
:
{
CHECK_EQ
(
size
,
1
);
size_t
totalDim
=
slots_
[
slot
].
varIndices
[
dataPos
[
0
]].
size
();
IVector
::
resizeOrCreate
(
cpuArguments
[
slot
].
ids
,
totalDim
,
/* useGpu= */
false
);
int
*
buf
=
cpuArguments
[
slot
].
ids
->
getData
();
memcpy
(
buf
,
slots_
[
slot
].
varIndices
[
dataPos
[
0
]].
data
(),
sizeof
(
int
)
*
totalDim
);
ICpuGpuVector
::
resizeOrCreate
(
cpuArguments
[
slot
].
sequenceStartPositions
,
size
+
1
,
/* size == 1 currently */
/* useGpu= */
false
);
int
*
bufStarts
=
cpuArguments
[
slot
].
sequenceStartPositions
->
getMutableData
(
false
);
bufStarts
[
0
]
=
0
;
/* we expand the convolutinal feature map to a sequence data,
* so there should be a corresponding sequence labels */
bufStarts
[
1
]
=
totalDim
;
break
;
}
case
SlotDef
::
STRING
:
{
if
(
cpuArguments
[
slot
].
strs
)
{
cpuArguments
[
slot
].
strs
->
resize
(
size
);
}
else
{
cpuArguments
[
slot
].
strs
=
std
::
make_shared
<
std
::
vector
<
std
::
string
>>
(
size
);
}
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
(
*
cpuArguments
[
slot
].
strs
)[
i
]
=
slots_
[
slot
].
strData
[
dataPos
[
i
]];
}
break
;
}
}
}
if
(
useGpu_
)
{
std
::
vector
<
Argument
>&
cpuArguments
=
cpuBatch
.
getStreams
();
DataBatch
&
gpuBatch
=
*
gpuBatch_
;
std
::
vector
<
Argument
>&
gpuArguments
=
gpuBatch
.
getStreams
();
gpuArguments
.
resize
(
cpuArguments
.
size
());
gpuBatch
.
setSize
(
size
);
for
(
int
i
=
0
;
i
<
header_
.
slot_defs_size
();
++
i
)
{
SlotDef
::
SlotType
slotType
=
header_
.
slot_defs
(
i
).
type
();
if
(
SlotDef
::
VECTOR_SPARSE_VALUE
==
slotType
||
SlotDef
::
VECTOR_SPARSE_NON_VALUE
==
slotType
)
{
gpuArguments
[
i
]
=
cpuArguments
[
i
];
gpuArguments
[
i
].
sequenceStartPositions
=
cpuArguments
[
i
].
sequenceStartPositions
;
}
else
{
gpuArguments
[
i
].
resizeAndCopyFrom
(
cpuArguments
[
i
],
useGpu_
,
HPPL_STREAM_1
);
}
}
hl_stream_synchronize
(
HPPL_STREAM_1
);
*
batch
=
gpuBatch
;
}
else
{
*
batch
=
cpuBatch
;
}
currentSequenceIndex_
+=
numScannedSeqs
;
return
batch
->
getSize
();
}
ProtoSequenceDataProvider
::
ProtoSequenceDataProvider
(
const
DataConfig
&
config
,
bool
useGpu
,
bool
loadDataAll
)
:
ProtoDataProvider
(
config
,
useGpu
,
loadDataAll
)
{}
int64_t
ProtoSequenceDataProvider
::
getNextBatchInternal
(
int64_t
size
,
DataBatch
*
batch
)
{
CHECK
(
iidData
())
<<
"ProtoSequenceDataProvider only accepts iid data"
;
int64_t
numSequences
=
0
;
// actual number of sequences in the batch
// the number of sequences scanned, including those skipped because too long
int64_t
numScannedSeqs
=
0
;
std
::
lock_guard
<
RWLock
>
guard
(
lock_
);
size
=
std
::
min
<
int64_t
>
(
getSize
()
-
currentSequenceIndex_
,
size
);
numScannedSeqs
=
numSequences
=
size
;
if
(
size
<=
0
)
return
0
;
DataBatch
&
cpuBatch
=
*
cpuBatch_
;
std
::
vector
<
Argument
>&
cpuArguments
=
cpuBatch
.
getStreams
();
cpuBatch
.
setSize
(
size
);
cpuArguments
.
resize
(
header_
.
slot_defs_size
());
for
(
int
slot
=
0
;
slot
<
header_
.
slot_defs_size
();
++
slot
)
{
SlotDef
::
SlotType
slotType
=
header_
.
slot_defs
(
slot
).
type
();
std
::
vector
<
int64_t
>
dataPos
;
dataPos
.
reserve
(
size
);
auto
op
=
[
this
,
&
dataPos
](
int64_t
pos
)
{
dataPos
.
push_back
(
pos
);
};
sampleLoop
(
op
,
size
);
// current slot: sequenceStartPositions
ICpuGpuVector
::
resizeOrCreate
(
cpuArguments
[
slot
].
sequenceStartPositions
,
size
+
1
,
/* useGpu= */
false
);
switch
(
slotType
)
{
case
SlotDef
::
VECTOR_SPARSE_VALUE
:
case
SlotDef
::
VAR_MDIM_DENSE
:
case
SlotDef
::
VAR_MDIM_INDEX
:
{
LOG
(
FATAL
)
<<
"ProtoSequenceDataProvider only support"
<<
" VECTOR_DENSE, VECTOR_SPARSE_NON_VALUE and INDEX slots"
;
break
;
}
case
SlotDef
::
VECTOR_SPARSE_NON_VALUE
:
{
// copy to IDS, not value
// pointers used in current slot
sparse_non_value_t
*
data
=
slots_
[
slot
].
sparseNonValueData
.
data
();
int64_t
*
indexs
=
slots_
[
slot
].
indices
.
data
();
int64_t
*
seqs
=
dataPos
.
data
();
// current slot: i need size instances. what is the total length?
int
totalFeatureInCurrentSlot
=
0
;
for
(
int
ins
=
0
;
ins
<
size
;
ins
++
)
{
int64_t
currInsId
=
seqs
[
ins
];
totalFeatureInCurrentSlot
+=
indexs
[
currInsId
+
1
]
-
indexs
[
currInsId
];
// special: if current instance has NO feature in current slot
if
(
indexs
[
currInsId
+
1
]
==
indexs
[
currInsId
])
{
totalFeatureInCurrentSlot
++
;
}
}
// done
// current slot: ids
IVector
::
resizeOrCreate
(
cpuArguments
[
slot
].
ids
,
totalFeatureInCurrentSlot
,
/* useGpu= */
false
);
// where to write
int
*
currPosOfArgumentId
=
cpuArguments
[
slot
].
ids
->
getData
();
int
*
currPosOfArgumentSeqStart
=
cpuArguments
[
slot
].
sequenceStartPositions
->
getMutableData
(
false
);
int
allSequenceLength
=
0
;
currPosOfArgumentSeqStart
[
0
]
=
0
;
// for each instance, copy data and fill sequence positions
for
(
int
instance
=
0
;
instance
<
size
;
instance
++
)
{
int64_t
currInstanceId
=
seqs
[
instance
];
int64_t
currInstanceLength
=
indexs
[
currInstanceId
+
1
]
-
indexs
[
currInstanceId
];
sparse_non_value_t
*
currInstanceData
=
data
+
indexs
[
currInstanceId
];
// write sequenceStartPositions
allSequenceLength
+=
currInstanceLength
;
currPosOfArgumentSeqStart
[
instance
+
1
]
=
allSequenceLength
;
// copy features
for
(
int
featCopier
=
0
;
featCopier
<
currInstanceLength
;
featCopier
++
)
{
currPosOfArgumentId
[
featCopier
]
=
currInstanceData
[
featCopier
].
col
;
}
currPosOfArgumentId
+=
currInstanceLength
;
// special: if current instance has NO feature in current slot
if
(
currInstanceLength
==
0
)
{
allSequenceLength
++
;
currPosOfArgumentSeqStart
[
instance
+
1
]
=
allSequenceLength
;
currPosOfArgumentId
[
0
]
=
-
1
;
currPosOfArgumentId
++
;
}
// done
}
if
(
slots_
[
slot
].
subIndices
.
size
())
{
std
::
vector
<
int64_t
>
dataSubPos
;
auto
op
=
[
this
,
&
dataSubPos
](
int64_t
pos
)
{
dataSubPos
.
push_back
(
pos
);
};
int
subSize
=
subSampleLoop
(
op
,
size
,
slot
);
ICpuGpuVector
::
resizeOrCreate
(
cpuArguments
[
slot
].
subSequenceStartPositions
,
subSize
+
1
,
false
);
int
*
currPosOfArgumentSubSeqStart
=
cpuArguments
[
slot
].
subSequenceStartPositions
->
getMutableData
(
false
);
int64_t
*
subSeqs
=
dataSubPos
.
data
();
int64_t
*
subIndexs
=
slots_
[
slot
].
subIndices
.
data
();
int
allSubSequenceLength
=
0
;
currPosOfArgumentSubSeqStart
[
0
]
=
0
;
// for each instance, compute sub-sequence number
for
(
int
instance
=
0
;
instance
<
subSize
;
instance
++
)
{
int64_t
currSubInstanceId
=
subSeqs
[
instance
];
int64_t
currSubInstanceLength
=
subIndexs
[
currSubInstanceId
+
1
]
-
subIndexs
[
currSubInstanceId
];
// write subSequenceStartPositions
allSubSequenceLength
+=
currSubInstanceLength
;
currPosOfArgumentSubSeqStart
[
instance
+
1
]
=
allSubSequenceLength
;
// special: if current instance has NO feature in current slot
if
(
currSubInstanceLength
==
0
)
{
allSubSequenceLength
++
;
currPosOfArgumentSubSeqStart
[
instance
+
1
]
=
allSubSequenceLength
;
}
}
cpuArguments
[
slot
].
checkSubset
();
}
break
;
}
case
SlotDef
::
INDEX
:
{
// label slot
IVector
::
resizeOrCreate
(
cpuArguments
[
slot
].
ids
,
size
,
/* useGpu= */
false
);
// fill labels
int
*
buf
=
cpuArguments
[
slot
].
ids
->
getData
();
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
buf
[
i
]
=
slots_
[
slot
].
indexData
[
dataPos
[
i
]];
}
// label HAS sequence structure
cpuArguments
[
slot
].
sequenceStartPositions
->
fillSequence
(
false
);
break
;
}
case
SlotDef
::
VECTOR_DENSE
:
{
// copy values
size_t
dim
=
header_
.
slot_defs
(
slot
).
dim
();
Matrix
::
resizeOrCreate
(
cpuArguments
[
slot
].
value
,
size
,
dim
,
false
,
// trans = false
false
);
// useGpu = false
real
*
buf
=
cpuArguments
[
slot
].
value
->
getData
();
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
memcpy
(
buf
+
i
*
dim
,
slots_
[
slot
].
denseData
.
data
()
+
dataPos
[
i
]
*
dim
,
sizeof
(
real
)
*
dim
);
}
// sequence structure
cpuArguments
[
slot
].
sequenceStartPositions
->
fillSequence
(
false
);
break
;
}
default:
{
LOG
(
FATAL
)
<<
"should not reach here"
;
}
}
}
if
(
useGpu_
)
{
std
::
vector
<
Argument
>&
cpuArguments
=
cpuBatch
.
getStreams
();
DataBatch
&
gpuBatch
=
*
gpuBatch_
;
std
::
vector
<
Argument
>&
gpuArguments
=
gpuBatch
.
getStreams
();
gpuArguments
.
resize
(
cpuArguments
.
size
());
gpuBatch
.
setSize
(
size
);
for
(
size_t
i
=
0
;
i
<
cpuArguments
.
size
();
++
i
)
{
gpuArguments
[
i
].
resizeAndCopyFrom
(
cpuArguments
[
i
],
useGpu_
,
HPPL_STREAM_1
);
}
hl_stream_synchronize
(
HPPL_STREAM_1
);
*
batch
=
gpuBatch
;
}
else
{
*
batch
=
cpuBatch
;
}
currentSequenceIndex_
+=
numScannedSeqs
;
return
batch
->
getSize
();
}
}
// namespace paddle
paddle/gserver/dataproviders/ProtoDataProvider.h
已删除
100644 → 0
浏览文件 @
571ef90c
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "DataFormat.pb.h"
#include "paddle/utils/Stat.h"
#include "DataProvider.h"
#include "ProtoReader.h"
namespace
paddle
{
/**
* @brief Provider data from protobuf data file with each sample
* specified by proto message
*
* DataSample defined in DataFormat.proto.
*
* The file format is
*
* header
*
* sample1
*
* sample2
*
* ...
*
* sampleN
*
* @note: In the data file, each message is prefixed with its length.
* The read/write of the protbuf are implemented in ProtoReader.h
*/
class
ProtoDataProvider
:
public
DataProvider
{
public:
ProtoDataProvider
(
const
DataConfig
&
config
,
bool
useGpu
,
bool
loadDataAll
=
true
);
virtual
void
reset
();
/**
* @note this size includes the sequences which are skipped because they
* are longer than the batch size.
*/
virtual
int64_t
getSize
()
{
int64_t
size
=
sampleNums_
;
if
(
usageRatio_
<
1.0
f
)
{
size
=
static_cast
<
int64_t
>
(
size
*
usageRatio_
);
}
return
size
;
}
virtual
void
shuffle
();
void
loadData
(
const
std
::
vector
<
std
::
string
>&
fileList
);
virtual
int64_t
getNextBatchInternal
(
int64_t
size
,
DataBatch
*
batch
);
protected:
/**
* @brief load protobuf data from a list of file
* @param[in] fileName file name of a file which contains
* a list of file names
*/
void
loadData
(
const
std
::
string
&
fileName
);
/**
* @brief load protobuf data from file
* @param[in] fileName data file name
*/
void
loadDataFile
(
const
std
::
string
&
fileName
);
/** @brief check data header of each data sample
* @param[in] header data header read from protobuf data
*/
void
checkDataHeader
(
const
DataHeader
&
header
);
/**
* @brief fill protobuf data into slot_,
* slot_ is a vector of ProtoSlot in memory.
* @param[in] sample data sample read from protobuf data
*/
void
fillSlots
(
const
DataSample
&
sample
);
/**
* @brief return true if each sample is one sequence, i.e., independent
* of other samples.
*/
inline
bool
iidData
()
const
{
return
sequenceStartPositions_
.
empty
();
}
/**
* @brief check that sample is consistent with header_
*/
void
checkSample
(
const
DataSample
&
sample
);
template
<
class
Op
>
int64_t
sequenceLoop
(
Op
op
,
int64_t
size
);
template
<
class
Op
>
int64_t
sampleLoop
(
Op
op
,
int64_t
size
);
template
<
class
Op
>
int64_t
subSampleLoop
(
Op
op
,
int64_t
size
,
int
slot
);
void
showDataStats
();
protected:
struct
ProtoVarSlot
{
std
::
vector
<
real
>
data
;
std
::
vector
<
int
>
dims
;
};
struct
ProtoSlot
{
SlotDef
::
SlotType
type
;
int
dim
;
std
::
vector
<
int
>
indexData
;
std
::
vector
<
real
>
denseData
;
std
::
vector
<
sparse_non_value_t
>
sparseNonValueData
;
std
::
vector
<
sparse_float_value_t
>
sparseFloatValueData
;
std
::
vector
<
int64_t
>
indices
;
std
::
vector
<
int64_t
>
subIndices
;
std
::
vector
<
ProtoVarSlot
>
varDenseData
;
std
::
vector
<
std
::
vector
<
int
>>
varIndices
;
std
::
vector
<
std
::
string
>
strData
;
};
DataHeader
header_
;
int
numVecSlots_
;
std
::
vector
<
ProtoSlot
>
slots_
;
size_t
sampleNums_
;
/**
* The starting position of each sequence in samples.
* The last element should be num of samples.
* If empty, each sample is one sequence.
*/
std
::
vector
<
size_t
>
sequenceStartPositions_
;
int64_t
currentSequenceIndex_
;
// The size should be the number of sequences.
std
::
vector
<
size_t
>
shuffledSequenceIds_
;
ThreadLocalD
<
DataBatch
>
cpuBatch_
;
ThreadLocalD
<
DataBatch
>
gpuBatch_
;
RWLock
lock_
;
std
::
vector
<
StatPtr
>
nnzStats_
;
// stats for number of none-zeros entries
};
/**
* @brief Special use for Proto data: instances should contain sparse-non-value
* slots
* and label.
*
* @note ProtoSequenceDataProvider treats each SPARSE SLOT as a SEQUENCE
*/
class
ProtoSequenceDataProvider
:
public
ProtoDataProvider
{
public:
ProtoSequenceDataProvider
(
const
DataConfig
&
config
,
bool
useGpu
,
bool
loadDataAll
=
true
);
~
ProtoSequenceDataProvider
()
{}
virtual
int64_t
getNextBatchInternal
(
int64_t
size
,
DataBatch
*
batch
);
};
}
// namespace paddle
paddle/gserver/layers/DotProdLayer.cpp
0 → 100644
浏览文件 @
5ee63bb6
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "Layer.h"
#include "paddle/math/Matrix.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
namespace
paddle
{
/**
* @brief A layer for computing the dot product of two vectors.
* Input1: vector (batchSize * dim)
* Input2: vector (batchSize * dim)
* Output: a matrix: (batchSize * 1)
*/
class
DotProdLayer
:
public
Layer
{
public:
explicit
DotProdLayer
(
const
LayerConfig
&
config
)
:
Layer
(
config
)
{}
~
DotProdLayer
()
{}
bool
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
override
;
void
forward
(
PassType
passType
)
override
;
void
backward
(
const
UpdateCallback
&
callback
=
nullptr
)
override
;
};
REGISTER_LAYER
(
dot_prod
,
DotProdLayer
);
bool
DotProdLayer
::
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
Layer
::
init
(
layerMap
,
parameterMap
);
CHECK_EQ
(
inputLayers_
.
size
(),
2U
);
CHECK_EQ
(
1UL
,
getSize
())
<<
"The output dimensionality of this layer should be fixed to 1."
;
return
true
;
}
void
DotProdLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
MatrixPtr
inV0
=
getInputValue
(
0
);
MatrixPtr
inV1
=
getInputValue
(
1
);
size_t
batchSize
=
inV0
->
getHeight
();
CHECK_EQ
(
inV1
->
getHeight
(),
batchSize
);
CHECK_EQ
(
inV0
->
getWidth
(),
inV1
->
getWidth
());
{
REGISTER_TIMER_INFO
(
"FwResetTimer"
,
getName
().
c_str
());
reserveOutput
(
batchSize
,
1
);
}
MatrixPtr
outV
=
getOutputValue
();
{
REGISTER_TIMER_INFO
(
"FwDotProdTimer"
,
getName
().
c_str
());
outV
->
sumOfProducts
(
*
inV0
,
*
inV1
,
1
,
0
);
}
}
void
DotProdLayer
::
backward
(
const
UpdateCallback
&
callback
)
{
MatrixPtr
inV0
=
getInputValue
(
0
);
MatrixPtr
inV1
=
getInputValue
(
1
);
MatrixPtr
outG
=
getOutputGrad
();
MatrixPtr
inG0
=
getInputGrad
(
0
);
MatrixPtr
inG1
=
getInputGrad
(
1
);
{
REGISTER_TIMER_INFO
(
"BwDotProdTimer"
,
getName
().
c_str
());
if
(
inG0
)
{
inG0
->
addRowScale
(
0
,
*
inV1
,
*
outG
);
}
if
(
inG1
)
{
inG1
->
addRowScale
(
0
,
*
inV0
,
*
outG
);
}
}
}
}
// namespace paddle
paddle/gserver/layers/MKLDNNConcatLayer.cpp
0 → 100644
浏览文件 @
5ee63bb6
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "MKLDNNConcatLayer.h"
using
namespace
mkldnn
;
// NOLINT
typedef
memory
::
format
format
;
namespace
paddle
{
REGISTER_LAYER
(
mkldnn_concat
,
MKLDNNConcatLayer
);
bool
MKLDNNConcatLayer
::
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
if
(
!
MKLDNNLayer
::
init
(
layerMap
,
parameterMap
))
{
return
false
;
}
CHECK_GT
(
inputLayers_
.
size
(),
1UL
);
CHECK
(
!
biasParameter_
);
return
true
;
}
void
MKLDNNConcatLayer
::
reshape
(
int
&
bs
,
int
&
ic
,
int
&
ih
,
int
&
iw
,
int
oc
,
int
&
oh
,
int
&
ow
)
{
reshapeInput
(
bs
,
ih
,
iw
);
ic
=
inputLayers_
[
0
]
->
getSize
()
/
ih
/
iw
;
CHECK_EQ
((
size_t
)
ic
*
ih
*
iw
,
inputLayers_
[
0
]
->
getSize
());
CHECK_EQ
(
inputElemenCnt_
,
(
size_t
)
bs
*
ic
*
ih
*
iw
);
CHECK_GT
(
inputLayers_
.
size
(),
1UL
);
channels_
.
resize
(
inputLayers_
.
size
());
channels_
[
0
]
=
ic
;
// need change the output channel, so use oc_ instead
// TODO(TJ): change API, use &oc
oc_
=
ic
;
for
(
size_t
i
=
1
;
i
<
inputLayers_
.
size
();
i
++
)
{
int
batchsize
,
height
,
witdh
;
reshapeInput
(
batchsize
,
height
,
witdh
,
i
);
CHECK_EQ
(
bs
,
batchsize
);
CHECK_EQ
(
ih
,
height
);
CHECK_EQ
(
iw
,
witdh
);
channels_
[
i
]
=
inputLayers_
[
i
]
->
getSize
()
/
height
/
witdh
;
CHECK_EQ
((
size_t
)
channels_
[
i
]
*
height
*
witdh
,
inputLayers_
[
i
]
->
getSize
());
oc_
+=
channels_
[
i
];
}
oh
=
ih
;
ow
=
iw
;
reshapeOutput
(
oh
,
ow
);
resizeOutput
(
bs
,
oc_
*
oh
*
ow
);
}
void
MKLDNNConcatLayer
::
resetFwd
(
std
::
vector
<
primitive
>&
pipeline
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
resetFwdBuffers
(
inVals_
,
out
);
in
=
inVals_
[
0
];
std
::
shared_ptr
<
concat
::
primitive_desc
>
fwdPD
;
resetFwdPD
(
fwdPD
,
inVals_
,
out
);
resetFwdPipeline
(
pipeline
,
fwdPD
,
inVals_
,
out
);
}
void
MKLDNNConcatLayer
::
resetBwd
(
std
::
vector
<
primitive
>&
pipeline
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
resetBwdBuffers
(
inGrads_
,
out
);
in
=
inGrads_
[
0
];
resetBwdPipeline
(
pipeline
,
bwds_
,
inGrads_
,
out
);
}
void
MKLDNNConcatLayer
::
resetFwdBuffers
(
std
::
vector
<
MKLDNNMatrixPtr
>&
inputs
,
MKLDNNMatrixPtr
&
out
)
{
inputs
.
resize
(
inputLayers_
.
size
());
bool
has8c
=
false
,
has16c
=
false
,
hasnc
=
false
;
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
// resetInValue will use ic_ so temporary change as current input's channel
// TODO(TJ): change ic_ as vector then can remove channels_
ic_
=
channels_
[
i
];
resetInValue
(
inputs
[
i
],
nullptr
,
i
);
CHECK
(
inputs
[
i
]);
auto
dm
=
inputs
[
i
]
->
getDims
();
// inputs format can be different, but ndims must equal
CHECK
(
i
==
0
||
dm
.
size
()
==
inputs
[
0
]
->
getDims
().
size
());
CHECK_EQ
(
bs_
,
dm
[
0
]);
CHECK_EQ
(
channels_
[
i
],
dm
[
1
]);
if
(
dm
.
size
()
>
2
)
{
CHECK_EQ
(
ih_
,
dm
[
2
]);
CHECK_EQ
(
iw_
,
dm
[
3
]);
}
if
(
inputs
[
i
]
->
getFormat
()
==
format
::
nc
)
{
hasnc
=
true
;
}
if
(
inputs
[
i
]
->
getFormat
()
==
format
::
nChw8c
)
{
has8c
=
true
;
}
if
(
inputs
[
i
]
->
getFormat
()
==
format
::
nChw16c
)
{
has16c
=
true
;
}
}
// change back, ic_ always save the input 0 size
ic_
=
channels_
[
0
];
format
outFmt
;
if
(
has16c
&&
oc_
%
16
==
0
)
{
outFmt
=
format
::
nChw16c
;
}
else
if
(
has8c
&&
oc_
%
8
==
0
)
{
outFmt
=
format
::
nChw8c
;
}
else
if
(
hasnc
)
{
CHECK
(
oh_
==
1
&&
ow_
==
1
);
outFmt
=
format
::
nc
;
}
else
{
outFmt
=
format
::
nchw
;
}
memory
::
dims
outDims
=
hasnc
?
memory
::
dims
{
bs_
,
oc_
}
:
memory
::
dims
{
bs_
,
oc_
,
oh_
,
ow_
};
auto
outPD
=
MKLDNNMatrix
::
createPrimitiveDesc
(
outDims
,
outFmt
,
engine_
);
resetOutValue
(
out
,
outPD
);
}
void
MKLDNNConcatLayer
::
resetFwdPD
(
std
::
shared_ptr
<
concat
::
primitive_desc
>&
pd
,
std
::
vector
<
MKLDNNMatrixPtr
>&
inputs
,
MKLDNNMatrixPtr
out
)
{
std
::
vector
<
memory
::
primitive_desc
>
srcPDs
;
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
srcPDs
.
push_back
(
inputs
[
i
]
->
getPrimitiveDesc
());
}
CHECK
(
out
);
pd
.
reset
(
new
concat
::
primitive_desc
(
out
->
getMemoryDesc
(),
axis_
,
srcPDs
));
CHECK_PRIMITIVE_DESC_EQ
(
out
,
pd
->
dst_primitive_desc
());
}
void
MKLDNNConcatLayer
::
resetFwdPipeline
(
std
::
vector
<
primitive
>&
pipeline
,
std
::
shared_ptr
<
concat
::
primitive_desc
>&
pd
,
std
::
vector
<
MKLDNNMatrixPtr
>&
inputs
,
MKLDNNMatrixPtr
&
out
)
{
std
::
vector
<
primitive
::
at
>
srcs
;
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
srcs
.
push_back
(
*
(
inputs
[
i
]));
}
fwd_
.
reset
(
new
concat
(
*
pd
,
srcs
,
*
out
));
pipeline
.
push_back
(
*
fwd_
);
}
void
MKLDNNConcatLayer
::
resetBwdBuffers
(
std
::
vector
<
MKLDNNMatrixPtr
>&
inputs
,
MKLDNNMatrixPtr
&
out
)
{
CHECK
(
outVal_
);
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
CHECK
(
out
);
inputs
.
resize
(
inputLayers_
.
size
());
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
CHECK
(
inVals_
[
i
]);
// resetInGrad will use inVal_
// TODO(TJ): change move inVals_ to MKLDNNLayer ans remove inVal_
inVal_
=
inVals_
[
i
];
resetInGrad
(
inputs
[
i
],
inVals_
[
i
]
->
getPrimitiveDesc
(),
i
);
CHECK_PRIMITIVE_DESC_EQ
(
inputs
[
i
],
inVals_
[
i
]
->
getPrimitiveDesc
());
}
// change back, inVal_ always save the input 0
inVal_
=
inVals_
[
0
];
}
void
MKLDNNConcatLayer
::
resetBwdPipeline
(
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
,
std
::
vector
<
std
::
shared_ptr
<
mkldnn
::
primitive
>>&
prims
,
std
::
vector
<
MKLDNNMatrixPtr
>&
inputs
,
MKLDNNMatrixPtr
&
out
)
{
// reset the backward primitives
memory
::
dims
offsets
=
{
0
,
0
,
0
,
0
};
prims
.
resize
(
inputs
.
size
());
CHECK_EQ
(
inputs
.
size
(),
channels_
.
size
());
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
auto
viewPD
=
view
::
primitive_desc
(
out
->
getPrimitiveDesc
(),
inputs
[
i
]
->
getDims
(),
offsets
);
auto
bwdPD
=
reorder
::
primitive_desc
(
viewPD
.
dst_primitive_desc
(),
inputs
[
i
]
->
getPrimitiveDesc
());
prims
[
i
].
reset
(
new
reorder
(
bwdPD
,
*
out
,
*
(
inputs
[
i
])));
offsets
[
axis_
]
+=
channels_
[
i
];
// push to pipeline
pipeline
.
push_back
(
*
prims
[
i
]);
}
}
}
// namespace paddle
paddle/gserver/layers/MKLDNNConcatLayer.h
0 → 100644
浏览文件 @
5ee63bb6
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "MKLDNNLayer.h"
#include "mkldnn.hpp"
namespace
paddle
{
/**
* @brief A subclass of MKLDNNLayer Concatenate layer.
*
* The config file api is mkldnn_concat
*/
class
MKLDNNConcatLayer
:
public
MKLDNNLayer
{
protected:
std
::
vector
<
MKLDNNMatrixPtr
>
inVals_
;
std
::
vector
<
MKLDNNMatrixPtr
>
inGrads_
;
std
::
vector
<
std
::
shared_ptr
<
mkldnn
::
primitive
>>
bwds_
;
// input channel numbers
std
::
vector
<
int
>
channels_
;
// concat_dimension in MKLDNN
// if axis_ == 0, concat batchsize
// if axis_ == 1, concat channel (default)
int
axis_
;
public:
explicit
MKLDNNConcatLayer
(
const
LayerConfig
&
config
)
:
MKLDNNLayer
(
config
),
axis_
(
1
)
{}
~
MKLDNNConcatLayer
()
{}
bool
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
override
;
void
reshape
(
int
&
bs
,
int
&
ic
,
int
&
ih
,
int
&
iw
,
int
oc
,
int
&
oh
,
int
&
ow
)
override
;
void
resetFwd
(
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
override
;
void
resetBwd
(
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
override
;
void
printSizeInfo
()
override
{
CHECK_EQ
(
channels_
.
size
(),
inputLayers_
.
size
());
for
(
size_t
i
=
0
;
i
<
channels_
.
size
();
++
i
)
{
VLOG
(
MKLDNN_SIZES
)
<<
"Input "
<<
i
<<
", "
<<
inputLayers_
[
i
]
->
getName
()
<<
": "
<<
bs_
<<
", "
<<
channels_
[
i
]
<<
", "
<<
ih_
<<
", "
<<
iw_
;
}
VLOG
(
MKLDNN_SIZES
)
<<
"Output: "
<<
bs_
<<
", "
<<
oc_
<<
", "
<<
oh_
<<
", "
<<
ow_
;
}
void
printValueFormat
()
override
{
for
(
size_t
i
=
0
;
i
<
inVals_
.
size
();
++
i
)
{
VLOG
(
MKLDNN_FMTS
)
<<
"Input "
<<
i
<<
", "
<<
inputLayers_
[
i
]
->
getName
()
<<
": "
<<
inVals_
[
i
]
->
getFormat
()
<<
" >>>"
;
}
if
(
outVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
outVal_
->
getFormat
()
<<
" >>> "
;
}
if
(
extOutVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
extOutVal_
->
getFormat
();
}
}
void
printGradFormat
()
override
{
if
(
extOutGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
extOutGrad_
->
getFormat
();
}
if
(
outGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
outGrad_
->
getFormat
()
<<
" <<< "
;
}
for
(
size_t
i
=
0
;
i
<
inGrads_
.
size
();
++
i
)
{
VLOG
(
MKLDNN_FMTS
)
<<
"Input "
<<
i
<<
", "
<<
inputLayers_
[
i
]
->
getName
()
<<
": "
<<
inGrads_
[
i
]
->
getFormat
()
<<
"<<<"
;
}
}
protected:
/**
* Forward functions: reset buffers(inputs, output, bias),
* reset primitive descriptor,
* reset pipeline.
*/
void
resetFwdBuffers
(
std
::
vector
<
MKLDNNMatrixPtr
>&
inputs
,
MKLDNNMatrixPtr
&
out
);
void
resetFwdPD
(
std
::
shared_ptr
<
mkldnn
::
concat
::
primitive_desc
>&
pd
,
std
::
vector
<
MKLDNNMatrixPtr
>&
inputs
,
MKLDNNMatrixPtr
out
);
void
resetFwdPipeline
(
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
,
std
::
shared_ptr
<
mkldnn
::
concat
::
primitive_desc
>&
pd
,
std
::
vector
<
MKLDNNMatrixPtr
>&
inputs
,
MKLDNNMatrixPtr
&
out
);
/**
* Backward functions: reset buffers(inputs, output, bias)
* reset primitives and pipeline
*/
void
resetBwdBuffers
(
std
::
vector
<
MKLDNNMatrixPtr
>&
inputs
,
MKLDNNMatrixPtr
&
out
);
void
resetBwdPipeline
(
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
,
std
::
vector
<
std
::
shared_ptr
<
mkldnn
::
primitive
>>&
prims
,
std
::
vector
<
MKLDNNMatrixPtr
>&
inputs
,
MKLDNNMatrixPtr
&
out
);
};
}
// namespace paddle
paddle/gserver/layers/MKLDNNLayer.cpp
浏览文件 @
5ee63bb6
...
...
@@ -21,8 +21,8 @@ namespace paddle {
bool
MKLDNNLayer
::
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
CHECK
(
FLAGS_use_mkldnn
)
<<
"M
kldnn
Layers only support use_mkldnn."
<<
"Please set WITH_MKL
DNN
=ON "
CHECK
(
FLAGS_use_mkldnn
)
<<
"M
KLDNN
Layers only support use_mkldnn."
<<
"Please set WITH_MKL=ON "
<<
"and set use_mkldnn=True"
;
CHECK
(
!
useGpu_
)
<<
"Do not support GPU yet"
;
...
...
@@ -138,8 +138,11 @@ void MKLDNNLayer::backward(const UpdateCallback& callback) {
}
}
void
MKLDNNLayer
::
reshapeInput
(
int
&
batchsize
,
int
&
height
,
int
&
width
)
{
const
Argument
&
input
=
inputLayers_
[
0
]
->
getOutput
();
void
MKLDNNLayer
::
reshapeInput
(
int
&
batchsize
,
int
&
height
,
int
&
width
,
size_t
inputIdx
)
{
const
Argument
&
input
=
inputLayers_
[
inputIdx
]
->
getOutput
();
batchsize
=
input
.
getBatchSize
();
int
h
=
input
.
getFrameHeight
();
int
w
=
input
.
getFrameWidth
();
...
...
paddle/gserver/layers/MKLDNNLayer.h
浏览文件 @
5ee63bb6
...
...
@@ -178,7 +178,10 @@ protected:
/**
* reshape the input image sizes and input batchsize
*/
void
reshapeInput
(
int
&
batchsize
,
int
&
height
,
int
&
width
);
void
reshapeInput
(
int
&
batchsize
,
int
&
height
,
int
&
width
,
size_t
inputIdx
=
0
);
/**
* reshape output image sizes
...
...
paddle/gserver/tests/CMakeLists.txt
浏览文件 @
5ee63bb6
...
...
@@ -29,7 +29,7 @@ gserver_test(test_KmaxSeqScore)
gserver_test
(
test_Expand
)
gserver_test
(
test_MaxPoolingWithMaskOutput
)
########## test_M
kldnn
layers and activations ##########
########## test_M
KLDNN
layers and activations ##########
if
(
WITH_MKLDNN
)
add_unittest_without_exec
(
test_MKLDNN
test_MKLDNN.cpp
...
...
@@ -62,17 +62,6 @@ if(NOT WITH_DOUBLE AND NOT MOBILE_INFERENCE)
endif
()
if
(
NOT MOBILE_INFERENCE
)
################### test_ProtoDataProvider ############
add_unittest_without_exec
(
test_ProtoDataProvider
test_ProtoDataProvider.cpp
)
# test_ProtoDataProvider will mkdir as same name,
# so if WORKING_DIRECTORY is default directory, then
# mkdir will get error.
add_test
(
NAME test_ProtoDataProvider
COMMAND
${
CMAKE_CURRENT_BINARY_DIR
}
/test_ProtoDataProvider
WORKING_DIRECTORY
${
PADDLE_SOURCE_DIR
}
/paddle
)
################## test_Evaluator #######################
add_unittest
(
test_Evaluator
test_Evaluator.cpp
)
...
...
@@ -110,3 +99,24 @@ add_test(NAME test_PyDataProvider2
COMMAND .set_python_path.sh -d
${
PADDLE_SOURCE_DIR
}
/paddle/gserver/tests:
${
PADDLE_SOURCE_DIR
}
/python
${
CMAKE_CURRENT_BINARY_DIR
}
/test_PyDataProvider2
WORKING_DIRECTORY
${
PADDLE_SOURCE_DIR
}
/paddle
)
################# test_CompareSparse ##################
add_unittest_without_exec
(
test_CompareSparse
test_CompareSparse.cpp
)
if
(
NOT ON_TRAVIS
)
add_test
(
NAME test_CompareSparse
COMMAND
${
PADDLE_SOURCE_DIR
}
/paddle/.set_python_path.sh -d
${
PADDLE_SOURCE_DIR
}
/python:
${
PADDLE_SOURCE_DIR
}
/paddle/gserver/tests
./.set_port.sh -p port -n 6
${
CMAKE_CURRENT_BINARY_DIR
}
/test_CompareSparse
WORKING_DIRECTORY
${
PADDLE_SOURCE_DIR
}
/paddle/
)
endif
()
################ test_CompareTwoNets ######################
add_unittest_without_exec
(
test_CompareTwoNets
test_CompareTwoNets.cpp
)
add_test
(
NAME test_CompareTwoNets
COMMAND
${
PADDLE_SOURCE_DIR
}
/paddle/.set_python_path.sh -d
${
PADDLE_SOURCE_DIR
}
/python:
${
PADDLE_SOURCE_DIR
}
/paddle/gserver/tests
${
CMAKE_CURRENT_BINARY_DIR
}
/test_CompareTwoNets
WORKING_DIRECTORY
${
PADDLE_SOURCE_DIR
}
/paddle/
)
paddle/gserver/tests/MKLDNNTester.h
浏览文件 @
5ee63bb6
...
...
@@ -23,7 +23,7 @@ limitations under the License. */
namespace
paddle
{
/**
* @brief test the functionality of M
kldnnlayer
s
* @brief test the functionality of M
KLDNNlayers and MKLDNNActivation
s
* refer to paddle original function
*/
class
MKLDNNTester
{
...
...
paddle/gserver/tests/proto_files.txt
已删除
100644 → 0
浏览文件 @
571ef90c
./test_ProtoDataProvider/data1.bin
./test_ProtoDataProvider/data2.bin
paddle/gserver/tests/proto_files_compressed.txt
已删除
100644 → 0
浏览文件 @
571ef90c
./test_ProtoDataProvider/data1.bin.gz
./test_ProtoDataProvider/data2.bin.gz
paddle/
trainer/tests/sample_trainer_config_opt_a
.conf
→
paddle/
gserver/tests/sequence_lstm
.conf
浏览文件 @
5ee63bb6
#!/usr/bin/env python
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
...
...
@@ -14,27 +15,50 @@
from
paddle
.
trainer_config_helpers
import
*
################################### Data Configuration ###################################
TrainData
(
ProtoData
(
files
=
"trainer/tests/mnist.list"
))
################################### Algorithm Configuration ###################################
settings
(
batch_size
=
1000
,
learning_method
=
MomentumOptimizer
(
momentum
=
0
.
5
,
sparse
=
False
))
################################### Network Configuration ###################################
data
=
data_layer
(
name
=
"input"
,
size
=
784
)
######################## data source ################################
dict_path
=
'gserver/tests/Sequence/tour_dict_phrase.dict'
dict_file
=
dict
()
for
line_count
,
line
in
enumerate
(
open
(
dict_path
,
"r"
)):
dict_file
[
line
.
strip
()] =
line_count
fc1
=
fc_layer
(
input
=
data
,
size
=
800
,
bias_attr
=
True
,
act
=
SigmoidActivation
())
define_py_data_sources2
(
train_list
=
'gserver/tests/Sequence/train.list'
,
test_list
=
None
,
module
=
'sequenceGen'
,
obj
=
'process'
,
args
={
"dict_file"
:
dict_file
})
fc2
=
fc_layer
(
input
=
fc1
,
size
=
800
,
bias_attr
=
True
,
act
=
SigmoidActivation
())
settings
(
batch_size
=
5
)
######################## network configure ################################
dict_dim
=
len
(
open
(
dict_path
,
'r'
).
readlines
())
word_dim
=
128
hidden_dim
=
256
label_dim
=
3
sparse_update
=
get_config_arg
(
"sparse_update"
,
bool
,
False
)
output
=
fc_layer
(
input
=[
fc1
,
fc2
],
size
=
10
,
bias_attr
=
True
,
act
=
SoftmaxActivation
())
data
=
data_layer
(
name
=
"word"
,
size
=
dict_dim
)
lbl
=
data_layer
(
name
=
"label"
,
size
=
1
)
emb
=
embedding_layer
(
input
=
data
,
size
=
word_dim
,
param_attr
=
ParamAttr
(
sparse_update
=
sparse_update
))
cost
=
classification_cost
(
input
=
output
,
label
=
lbl
)
outputs
(
cost
)
with
mixed_layer
(
size
=
hidden_dim
*
4
)
as
lstm_input
:
lstm_input
+=
full_matrix_projection
(
input
=
emb
)
lstm
=
lstmemory
(
input
=
lstm_input
,
act
=
TanhActivation
(),
gate_act
=
SigmoidActivation
(),
state_act
=
TanhActivation
())
lstm_last
=
last_seq
(
input
=
lstm
)
with
mixed_layer
(
size
=
label_dim
,
act
=
SoftmaxActivation
(),
bias_attr
=
True
)
as
output
:
output
+=
full_matrix_projection
(
input
=
lstm_last
)
outputs
(
classification_cost
(
input
=
output
,
label
=
data_layer
(
name
=
"label"
,
size
=
1
)))
paddle/
trainer/tests/sample_trainer_config_opt_b.conf
→
paddle/
gserver/tests/sequence_recurrent.py
浏览文件 @
5ee63bb6
#!/usr/bin/env python
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
...
...
@@ -14,27 +15,42 @@
from
paddle.trainer_config_helpers
import
*
################################### Data Configuration ###################################
TrainData
(
ProtoData
(
files
=
"trainer/tests/mnist.list"
))
################################### Algorithm Configuration ###################################
settings
(
batch_size
=
1000
,
learning_method
=
MomentumOptimizer
(
momentum
=
0
.
5
,
sparse
=
False
))
################################### Network Configuration ###################################
data
=
data_layer
(
name
=
"input"
,
size
=
784
)
######################## data source ################################
dict_path
=
'gserver/tests/Sequence/tour_dict_phrase.dict'
dict_file
=
dict
()
for
line_count
,
line
in
enumerate
(
open
(
dict_path
,
"r"
)):
dict_file
[
line
.
strip
()]
=
line_count
fc1
=
fc_layer
(
input
=
data
,
size
=
800
,
bias_attr
=
True
,
act
=
SigmoidActivation
())
define_py_data_sources2
(
train_list
=
'gserver/tests/Sequence/train.list'
,
test_list
=
None
,
module
=
'sequenceGen'
,
obj
=
'process'
,
args
=
{
"dict_file"
:
dict_file
})
fc2
=
fc_layer
(
input
=
fc1
,
size
=
800
,
bias_attr
=
True
,
act
=
SigmoidActivation
())
settings
(
batch_size
=
5
)
######################## network configure ################################
dict_dim
=
len
(
open
(
dict_path
,
'r'
).
readlines
())
word_dim
=
128
hidden_dim
=
128
label_dim
=
3
output
=
fc_layer
(
input
=[
fc1
,
fc2
],
size
=
10
,
bias_attr
=
True
,
act
=
SoftmaxActivation
())
# This config is designed to be equivalent with sequence_recurrent_group.py
lbl
=
data_layer
(
name
=
"label"
,
size
=
1
)
data
=
data_layer
(
name
=
"word"
,
size
=
dict_dim
)
cost
=
classification_cost
(
input
=
output
,
label
=
lbl
)
outputs
(
cost
)
emb
=
embedding_layer
(
input
=
data
,
size
=
word_dim
,
param_attr
=
ParamAttr
(
name
=
"emb"
))
recurrent
=
recurrent_layer
(
input
=
emb
,
bias_attr
=
False
,
act
=
SoftmaxActivation
())
recurrent_last
=
last_seq
(
input
=
recurrent
)
with
mixed_layer
(
size
=
label_dim
,
act
=
SoftmaxActivation
(),
bias_attr
=
True
)
as
output
:
output
+=
full_matrix_projection
(
input
=
recurrent_last
)
outputs
(
classification_cost
(
input
=
output
,
label
=
data_layer
(
name
=
"label"
,
size
=
1
)))
paddle/gserver/tests/sequence_recurrent_group.py
0 → 100644
浏览文件 @
5ee63bb6
#!/usr/bin/env python
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle.trainer_config_helpers
import
*
######################## data source ################################
dict_path
=
'gserver/tests/Sequence/tour_dict_phrase.dict'
dict_file
=
dict
()
for
line_count
,
line
in
enumerate
(
open
(
dict_path
,
"r"
)):
dict_file
[
line
.
strip
()]
=
line_count
define_py_data_sources2
(
train_list
=
'gserver/tests/Sequence/train.list'
,
test_list
=
None
,
module
=
'sequenceGen'
,
obj
=
'process'
,
args
=
{
"dict_file"
:
dict_file
})
settings
(
batch_size
=
5
)
######################## network configure ################################
dict_dim
=
len
(
open
(
dict_path
,
'r'
).
readlines
())
word_dim
=
128
hidden_dim
=
128
label_dim
=
3
# This config is designed to be equivalent with sequence_recurrent.py
data
=
data_layer
(
name
=
"word"
,
size
=
dict_dim
)
emb
=
embedding_layer
(
input
=
data
,
size
=
word_dim
,
param_attr
=
ParamAttr
(
name
=
"emb"
))
def
step
(
y
):
mem
=
memory
(
name
=
"rnn_state"
,
size
=
hidden_dim
)
with
mixed_layer
(
name
=
"rnn_state"
,
size
=
hidden_dim
,
bias_attr
=
False
,
act
=
SoftmaxActivation
())
as
out
:
out
+=
identity_projection
(
input
=
y
)
out
+=
full_matrix_projection
(
input
=
mem
,
param_attr
=
ParamAttr
(
name
=
"___recurrent_layer_0__"
))
return
out
recurrent
=
recurrent_group
(
name
=
"rnn"
,
step
=
step
,
input
=
emb
)
recurrent_last
=
last_seq
(
input
=
recurrent
)
with
mixed_layer
(
size
=
label_dim
,
act
=
SoftmaxActivation
(),
bias_attr
=
True
)
as
output
:
output
+=
full_matrix_projection
(
input
=
recurrent_last
)
outputs
(
classification_cost
(
input
=
output
,
label
=
data_layer
(
name
=
"label"
,
size
=
1
)))
paddle/
train
er/tests/test_CompareSparse.cpp
→
paddle/
gserv
er/tests/test_CompareSparse.cpp
浏览文件 @
5ee63bb6
...
...
@@ -22,8 +22,7 @@ limitations under the License. */
using
namespace
paddle
;
// NOLINT
using
namespace
std
;
// NOLINT
static
const
string
&
configFile1
=
"trainer/tests/sample_trainer_config_compare_sparse.conf"
;
static
const
string
&
configFile1
=
"gserver/tests/sequence_lstm.conf"
;
DECLARE_bool
(
use_gpu
);
DECLARE_string
(
config
);
...
...
paddle/
train
er/tests/test_CompareTwoNets.cpp
→
paddle/
gserv
er/tests/test_CompareTwoNets.cpp
浏览文件 @
5ee63bb6
...
...
@@ -30,8 +30,6 @@ DECLARE_bool(use_gpu);
DECLARE_string
(
config
);
DECLARE_string
(
nics
);
DEFINE_string
(
config_file_a
,
""
,
"config of one network to compare"
);
DEFINE_string
(
config_file_b
,
""
,
"config of another network to compare"
);
DEFINE_bool
(
need_high_accuracy
,
false
,
"whether need to run in double accuracy"
);
...
...
@@ -42,6 +40,10 @@ DEFINE_double(
DECLARE_bool
(
thread_local_rand_use_global_seed
);
DECLARE_int32
(
seed
);
static
const
string
&
config_file_a
=
"gserver/tests/sequence_recurrent.py"
;
static
const
string
&
config_file_b
=
"gserver/tests/sequence_recurrent_group.py"
;
struct
ComData
{
vector
<
Argument
>
outArgs
;
vector
<
ParameterPtr
>
parameters
;
...
...
@@ -66,6 +68,7 @@ void calcGradient(ComData& data, const string configFile) {
DataBatch
dataBatch
;
int32_t
batchSize
=
trainer
.
getConfig
().
opt_config
().
batch_size
();
trainer
.
getDataProvider
()
->
reset
();
trainer
.
getDataProvider
()
->
setSkipShuffle
();
trainer
.
getDataProvider
()
->
getNextBatch
(
batchSize
,
&
dataBatch
);
...
...
@@ -167,11 +170,11 @@ void compareGradient(ComData& comDataA, ComData& comDataB) {
TEST
(
Trainer
,
create
)
{
ComData
dataA
;
calcGradient
(
dataA
,
FLAGS_
config_file_a
);
calcGradient
(
dataA
,
config_file_a
);
LOG
(
INFO
)
<<
"
\n\n
forwardBackward of Network A is finished
\n\n
"
;
ComData
dataB
;
calcGradient
(
dataB
,
FLAGS_
config_file_b
);
calcGradient
(
dataB
,
config_file_b
);
LOG
(
INFO
)
<<
"
\n\n
forwardBackward of the Network B is finished
\n\n
"
;
compareGradient
(
dataA
,
dataB
);
...
...
paddle/gserver/tests/test_LayerGrad.cpp
浏览文件 @
5ee63bb6
...
...
@@ -1081,6 +1081,21 @@ TEST(Layer, InterpolationLayer) {
}
}
TEST
(
Layer
,
DotProdLayer
)
{
TestConfig
config
;
config
.
layerConfig
.
set_type
(
"dot_prod"
);
config
.
layerConfig
.
set_size
(
1
);
config
.
inputDefs
.
push_back
({
INPUT_DATA
,
"layer_0"
,
10
,
0
});
config
.
layerConfig
.
add_inputs
();
config
.
inputDefs
.
push_back
({
INPUT_DATA
,
"layer_1"
,
10
,
0
});
config
.
layerConfig
.
add_inputs
();
for
(
auto
useGpu
:
{
false
,
true
})
{
testLayerGrad
(
config
,
"dot_prod"
,
10
,
false
,
useGpu
);
}
}
TEST
(
Layer
,
OuterProdLayer
)
{
TestConfig
config
;
config
.
layerConfig
.
set_type
(
"out_prod"
);
...
...
paddle/gserver/tests/test_MKLDNN.cpp
浏览文件 @
5ee63bb6
...
...
@@ -313,6 +313,47 @@ TEST(MKLDNNLayer, AddtoLayer) {
testAddtoLayer
({
4
,
12
,
1
,
1
},
3
);
}
static
void
getMKLDNNConcatConfig
(
TestConfig
&
cfg
,
const
std
::
vector
<
testImageDesc
>&
inputs
)
{
CHECK_GE
(
inputs
.
size
(),
2
)
<<
"at least two inputs"
;
int
oc
=
inputs
[
0
].
ic
;
for
(
size_t
i
=
1
;
i
<
inputs
.
size
();
++
i
)
{
CHECK_EQ
(
inputs
[
i
].
bs
,
inputs
[
0
].
bs
);
CHECK_EQ
(
inputs
[
i
].
ih
,
inputs
[
0
].
ih
);
CHECK_EQ
(
inputs
[
i
].
iw
,
inputs
[
0
].
iw
);
oc
+=
inputs
[
i
].
ic
;
}
cfg
.
biasSize
=
0
;
cfg
.
layerConfig
.
set_type
(
"mkldnn_concat"
);
cfg
.
layerConfig
.
set_size
(
oc
*
inputs
[
0
].
ih
*
inputs
[
0
].
iw
);
cfg
.
layerConfig
.
set_active_type
(
"relu"
);
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
++
i
)
{
std
::
stringstream
ss
;
ss
<<
"layer_"
<<
i
;
cfg
.
inputDefs
.
push_back
(
{
INPUT_DATA
,
ss
.
str
(),
(
size_t
)(
inputs
[
i
].
ic
)
*
inputs
[
i
].
ih
*
inputs
[
i
].
iw
,
0
});
LayerInputConfig
*
input
=
cfg
.
layerConfig
.
add_inputs
();
ImageConfig
*
img_conf
=
input
->
mutable_image_conf
();
img_conf
->
set_channels
(
inputs
[
i
].
ic
);
img_conf
->
set_img_size_y
(
inputs
[
i
].
ih
);
img_conf
->
set_img_size
(
inputs
[
i
].
iw
);
}
}
void
testConcatLayer
(
const
std
::
vector
<
testImageDesc
>&
inputs
)
{
TestConfig
dnnConfig
;
getMKLDNNConcatConfig
(
dnnConfig
,
inputs
);
RUN_MKLDNN_TEST_LAYER
(
dnnConfig
,
"concat"
,
inputs
[
0
])
}
TEST
(
MKLDNNLayer
,
ConcatLayer
)
{
testConcatLayer
({{
64
,
128
,
1
,
1
},
{
64
,
32
,
1
,
1
},
{
64
,
64
,
1
,
1
}});
testConcatLayer
({{
32
,
100
,
8
,
8
},
{
32
,
10
,
8
,
8
}});
}
void
testActivation
(
std
::
string
actType
,
const
testImageDesc
&
pm
)
{
// TODO(TJ): remove me when paddle support elu activation
if
(
actType
==
"mkldnn_elu"
)
{
...
...
paddle/gserver/tests/test_ProtoDataProvider.cpp
已删除
100644 → 0
浏览文件 @
571ef90c
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <memory>
#include <string>
#include <gtest/gtest.h>
#include "paddle/gserver/dataproviders/ProtoDataProvider.h"
#include "paddle/utils/Util.h"
#include "paddle/testing/TestUtil.h"
using
namespace
std
;
// NOLINT
std
::
vector
<
string
>
protoFiles
{
"./test_ProtoDataProvider/data1.bin"
,
"./test_ProtoDataProvider/data2.bin"
,
};
std
::
vector
<
string
>
protoFilesCompressed
{
"./test_ProtoDataProvider/data1.bin.gz"
,
"./test_ProtoDataProvider/data2.bin.gz"
,
};
const
char
*
kTestDir
=
"./test_ProtoDataProvider"
;
const
char
kProtoFileList
[]
=
"gserver/tests/proto_files.txt"
;
const
char
kProtoFileListCompressed
[]
=
"gserver/tests/proto_files_compressed.txt"
;
const
int
kSpraseMatrixDim
=
1024
;
using
namespace
paddle
;
// NOLINT
void
prepareData
(
DataBatch
*
batch
,
const
int
*
numPerSlotType
,
bool
iid
,
bool
useGpu
)
{
batch
->
clear
();
int64_t
size
=
uniformRandom
(
100
)
+
10
;
batch
->
setSize
(
size
);
ICpuGpuVectorPtr
sequenceStartPositions
;
ICpuGpuVectorPtr
subSequenceStartPositions
;
if
(
!
iid
)
{
int
numSeqs
=
uniformRandom
(
10
)
+
1
;
sequenceStartPositions
=
ICpuGpuVector
::
create
(
numSeqs
+
1
,
/* useGpu= */
false
);
int
*
buf
=
sequenceStartPositions
->
getMutableData
(
false
);
subSequenceStartPositions
=
ICpuGpuVector
::
create
(
numSeqs
+
1
,
/* useGpu= */
false
);
int
*
subBuf
=
subSequenceStartPositions
->
getMutableData
(
false
);
int64_t
pos
=
0
;
int
maxLen
=
2
*
size
/
numSeqs
;
for
(
int
i
=
0
;
i
<
numSeqs
;
++
i
)
{
int
len
=
uniformRandom
(
min
<
int64_t
>
(
maxLen
,
size
-
pos
-
numSeqs
+
i
))
+
1
;
buf
[
i
]
=
pos
;
subBuf
[
i
]
=
pos
;
pos
+=
len
;
VLOG
(
1
)
<<
" len="
<<
len
;
}
buf
[
numSeqs
]
=
size
;
subBuf
[
numSeqs
]
=
size
;
}
vector
<
Argument
>&
arguments
=
batch
->
getStreams
();
for
(
int
i
=
0
;
i
<
numPerSlotType
[
SlotDef
::
VECTOR_DENSE
];
++
i
)
{
int64_t
dim
=
rand
()
%
10
+
4
;
// NOLINT rand_r
MatrixPtr
mat
=
Matrix
::
create
(
size
,
dim
,
/* trans= */
false
,
false
);
mat
->
randomizeUniform
();
Argument
arg
;
arg
.
value
=
mat
;
arg
.
sequenceStartPositions
=
sequenceStartPositions
;
arguments
.
push_back
(
arg
);
}
for
(
int
i
=
0
;
i
<
numPerSlotType
[
SlotDef
::
VECTOR_SPARSE_NON_VALUE
];
++
i
)
{
MatrixPtr
mat
=
makeRandomSparseMatrix
(
size
,
kSpraseMatrixDim
,
false
,
useGpu
);
Argument
arg
;
arg
.
value
=
mat
;
arg
.
sequenceStartPositions
=
sequenceStartPositions
;
arg
.
subSequenceStartPositions
=
subSequenceStartPositions
;
arguments
.
push_back
(
arg
);
}
for
(
int
i
=
0
;
i
<
numPerSlotType
[
SlotDef
::
VECTOR_SPARSE_VALUE
];
++
i
)
{
MatrixPtr
mat
=
makeRandomSparseMatrix
(
size
,
kSpraseMatrixDim
,
true
,
useGpu
);
Argument
arg
;
arg
.
value
=
mat
;
arg
.
sequenceStartPositions
=
sequenceStartPositions
;
arguments
.
push_back
(
arg
);
}
for
(
int
i
=
0
;
i
<
numPerSlotType
[
SlotDef
::
STRING
];
++
i
)
{
int64_t
dim
=
rand
()
%
10
+
4
;
// NOLINT rand_r
SVectorPtr
vec
=
std
::
make_shared
<
std
::
vector
<
std
::
string
>>
();
for
(
int
j
=
0
;
j
<
size
;
++
j
)
{
vec
->
push_back
(
randStr
(
dim
));
}
Argument
arg
;
arg
.
strs
=
vec
;
arg
.
sequenceStartPositions
=
sequenceStartPositions
;
arguments
.
push_back
(
arg
);
}
for
(
int
i
=
0
;
i
<
numPerSlotType
[
SlotDef
::
INDEX
];
++
i
)
{
int64_t
dim
=
rand
()
%
10
+
4
;
// NOLINT rand_r
IVectorPtr
vec
=
IVector
::
create
(
size
,
/* useGpu= */
false
);
int
*
buf
=
vec
->
getData
();
for
(
int
j
=
0
;
j
<
size
;
++
j
)
{
buf
[
j
]
=
uniformRandom
(
dim
);
}
Argument
arg
;
arg
.
ids
=
vec
;
arg
.
sequenceStartPositions
=
sequenceStartPositions
;
arguments
.
push_back
(
arg
);
}
}
inline
int
getSlotDim
(
const
Argument
&
arg
)
{
if
(
arg
.
value
)
{
return
arg
.
value
->
getWidth
();
}
else
if
(
arg
.
ids
)
{
return
arg
.
ids
->
getMax
()
+
1
;
}
else
if
(
arg
.
strs
)
{
return
1
;
}
LOG
(
FATAL
)
<<
"Invalid argument"
;
return
0
;
}
inline
SlotDef
::
SlotType
getSlotType
(
const
Argument
&
arg
)
{
if
(
arg
.
value
)
{
auto
&
m
=
*
arg
.
value
;
auto
&
type
=
typeid
(
m
);
if
(
type
==
typeid
(
CpuMatrix
)
||
type
==
typeid
(
GpuMatrix
))
{
return
SlotDef
::
VECTOR_DENSE
;
}
if
(
type
==
typeid
(
CpuSparseMatrix
))
{
auto
valueType
=
std
::
dynamic_pointer_cast
<
CpuSparseMatrix
>
(
arg
.
value
)
->
getValueType
();
if
(
NO_VALUE
==
valueType
)
{
return
SlotDef
::
VECTOR_SPARSE_NON_VALUE
;
}
else
{
return
SlotDef
::
VECTOR_SPARSE_VALUE
;
}
}
if
(
type
==
typeid
(
GpuSparseMatrix
))
{
auto
valueType
=
std
::
dynamic_pointer_cast
<
GpuSparseMatrix
>
(
arg
.
value
)
->
getValueType
();
if
(
NO_VALUE
==
valueType
)
{
return
SlotDef
::
VECTOR_SPARSE_NON_VALUE
;
}
else
{
return
SlotDef
::
VECTOR_SPARSE_VALUE
;
}
}
LOG
(
FATAL
)
<<
"Unknown matrix type"
;
}
if
(
arg
.
ids
)
return
SlotDef
::
INDEX
;
if
(
arg
.
strs
)
return
SlotDef
::
STRING
;
LOG
(
FATAL
)
<<
"Invalid argument"
;
return
SlotDef
::
VECTOR_DENSE
;
}
void
getColRow
(
const
Argument
&
arg
,
int64_t
pos
,
bool
useGpu
,
int
*
colNum
,
const
int
**
rowCols
,
const
real
**
rowValues
)
{
SlotDef
::
SlotType
type
=
getSlotType
(
arg
);
GpuSparseMatrixPtr
matGpu
;
CpuSparseMatrixPtr
matCpu
;
if
(
useGpu
)
{
matGpu
=
dynamic_pointer_cast
<
GpuSparseMatrix
>
(
arg
.
value
);
ASSERT_TRUE
(
matGpu
!=
NULL
);
}
else
{
matCpu
=
dynamic_pointer_cast
<
CpuSparseMatrix
>
(
arg
.
value
);
ASSERT_TRUE
(
matCpu
!=
NULL
);
}
*
colNum
=
useGpu
?
matGpu
->
getColNum
(
pos
)
:
matCpu
->
getColNum
(
pos
);
*
rowCols
=
useGpu
?
matGpu
->
getRowCols
(
pos
)
:
matCpu
->
getRowCols
(
pos
);
if
(
type
==
SlotDef
::
VECTOR_SPARSE_VALUE
)
{
*
rowValues
=
useGpu
?
matGpu
->
getRowValues
(
pos
)
:
matCpu
->
getRowValues
(
pos
);
}
else
{
*
rowValues
=
NULL
;
}
}
void
makeSample
(
const
vector
<
Argument
>&
arguments
,
int64_t
pos
,
bool
isBeginning
,
DataSample
*
sample
,
bool
useGpu
)
{
sample
->
set_is_beginning
(
isBeginning
);
int
slotid
=
0
;
for
(
auto
&
arg
:
arguments
)
{
SlotDef
::
SlotType
type
=
getSlotType
(
arg
);
int64_t
dim
=
getSlotDim
(
arg
);
switch
(
type
)
{
case
SlotDef
::
VECTOR_DENSE
:
{
VectorSlot
*
vecSlot
=
sample
->
add_vector_slots
();
auto
values
=
vecSlot
->
mutable_values
();
values
->
Reserve
(
dim
);
for
(
int
i
=
0
;
i
<
dim
;
++
i
)
{
values
->
AddAlreadyReserved
(
static_cast
<
float
>
(
arg
.
value
->
getElement
(
pos
,
i
)));
}
break
;
}
case
SlotDef
::
INDEX
:
{
sample
->
add_id_slots
(
arg
.
ids
->
get
(
pos
));
break
;
}
case
SlotDef
::
VECTOR_SPARSE_NON_VALUE
:
{
VectorSlot
*
vecSlot
=
sample
->
add_vector_slots
();
auto
ids
=
vecSlot
->
mutable_ids
();
int
colNum
;
const
int
*
rowCols
;
const
real
*
rowValues
;
// nullptr
getColRow
(
arg
,
pos
,
useGpu
,
&
colNum
,
&
rowCols
,
&
rowValues
);
ids
->
Reserve
(
colNum
);
for
(
int
i
=
0
;
i
<
colNum
;
++
i
)
{
ids
->
AddAlreadyReserved
(
rowCols
[
i
]);
}
SubseqSlot
*
subseqSlot
=
sample
->
add_subseq_slots
();
// subseq
subseqSlot
->
set_slot_id
(
slotid
);
auto
lens
=
subseqSlot
->
mutable_lens
();
lens
->
Add
(
colNum
);
break
;
}
case
SlotDef
::
VECTOR_SPARSE_VALUE
:
{
VectorSlot
*
vecSlot
=
sample
->
add_vector_slots
();
auto
values
=
vecSlot
->
mutable_values
();
auto
ids
=
vecSlot
->
mutable_ids
();
int
colNum
;
const
int
*
rowCols
;
const
real
*
rowValues
;
getColRow
(
arg
,
pos
,
useGpu
,
&
colNum
,
&
rowCols
,
&
rowValues
);
ids
->
Reserve
(
colNum
);
values
->
Reserve
(
colNum
);
for
(
int
i
=
0
;
i
<
colNum
;
++
i
)
{
ids
->
AddAlreadyReserved
(
rowCols
[
i
]);
values
->
AddAlreadyReserved
(
rowValues
[
i
]);
}
break
;
}
case
SlotDef
::
VAR_MDIM_DENSE
:
case
SlotDef
::
VAR_MDIM_INDEX
:
{
LOG
(
FATAL
)
<<
"Not implemented"
;
break
;
}
case
SlotDef
::
STRING
:
{
VectorSlot
*
vecSlot
=
sample
->
add_vector_slots
();
vecSlot
->
add_strs
((
*
arg
.
strs
)[
pos
]);
break
;
}
}
slotid
++
;
}
}
void
writeData
(
const
DataBatch
&
batch
,
bool
useGpu
,
bool
dataCompression
)
{
DataHeader
header
;
const
vector
<
Argument
>&
arguments
=
batch
.
getStreams
();
for
(
auto
&
argument
:
arguments
)
{
SlotDef
*
slotDef
=
header
.
add_slot_defs
();
slotDef
->
set_type
(
getSlotType
(
argument
));
slotDef
->
set_dim
(
getSlotDim
(
argument
));
}
VLOG
(
1
)
<<
"header="
<<
header
.
DebugString
();
int64_t
totalSeqs
=
batch
.
getNumSequences
();
int64_t
seq
=
0
;
ICpuGpuVectorPtr
sequenceStartPositions
=
arguments
[
0
].
sequenceStartPositions
;
int64_t
numWritten
=
0
;
vector
<
string
>
curProtoFiles
=
dataCompression
?
protoFilesCompressed
:
protoFiles
;
for
(
size_t
i
=
0
;
i
<
curProtoFiles
.
size
();
++
i
)
{
int64_t
numSeqs
=
totalSeqs
*
(
i
+
1
)
/
curProtoFiles
.
size
()
-
totalSeqs
*
i
/
curProtoFiles
.
size
();
ofstream
os
(
curProtoFiles
[
i
]);
CHECK
(
os
)
<<
"Fail to open "
<<
curProtoFiles
[
i
];
unique_ptr
<
ProtoWriter
>
writer
(
new
ProtoWriter
(
&
os
,
dataCompression
));
CHECK
(
writer
->
write
(
header
));
for
(
int
j
=
0
;
j
<
numSeqs
;
++
j
,
++
seq
)
{
int64_t
begin
=
seq
;
int64_t
end
=
seq
+
1
;
if
(
sequenceStartPositions
)
{
begin
=
sequenceStartPositions
->
getElement
(
seq
);
end
=
sequenceStartPositions
->
getElement
(
seq
+
1
);
}
for
(
int
pos
=
begin
;
pos
<
end
;
++
pos
)
{
DataSample
sample
;
makeSample
(
arguments
,
pos
,
pos
==
begin
,
&
sample
,
useGpu
);
CHECK
(
writer
->
write
(
sample
));
++
numWritten
;
}
}
writer
.
reset
(
nullptr
);
os
.
close
();
}
CHECK_EQ
(
arguments
[
0
].
getBatchSize
(),
numWritten
);
}
// check that the sample at pos1 in args1 is same as the sample at pos2 in args2
void
checkSample
(
const
vector
<
Argument
>&
args1
,
int64_t
pos1
,
const
vector
<
Argument
>&
args2
,
int64_t
pos2
,
bool
useGpu
)
{
EXPECT_EQ
(
args1
.
size
(),
args2
.
size
());
VLOG
(
1
)
<<
" pos1="
<<
pos1
<<
" pos2="
<<
pos2
;
for
(
size_t
i
=
0
;
i
<
args1
.
size
();
++
i
)
{
auto
type
=
getSlotType
(
args1
[
i
]);
int
dim
=
getSlotDim
(
args1
[
i
]);
EXPECT_EQ
(
type
,
getSlotType
(
args2
[
i
]));
if
(
type
==
SlotDef
::
INDEX
)
{
EXPECT_GE
(
dim
,
getSlotDim
(
args2
[
i
]));
}
else
{
EXPECT_EQ
(
dim
,
getSlotDim
(
args2
[
i
]));
}
switch
(
type
)
{
case
SlotDef
::
VECTOR_DENSE
:
{
for
(
int
j
=
0
;
j
<
dim
;
++
j
)
{
EXPECT_EQ
(
static_cast
<
float
>
(
args1
[
i
].
value
->
getElement
(
pos1
,
j
)),
static_cast
<
float
>
(
args2
[
i
].
value
->
getElement
(
pos2
,
j
)));
}
break
;
}
case
SlotDef
::
INDEX
:
{
EXPECT_EQ
(
args1
[
i
].
ids
->
get
(
pos1
),
args2
[
i
].
ids
->
get
(
pos2
));
break
;
}
case
SlotDef
::
VECTOR_SPARSE_NON_VALUE
:
case
SlotDef
::
VECTOR_SPARSE_VALUE
:
{
int
colNum1
,
colNum2
;
const
int
*
rowCols1
,
*
rowCols2
;
const
real
*
rowValues1
,
*
rowValues2
;
getColRow
(
args1
[
i
],
pos1
,
useGpu
,
&
colNum1
,
&
rowCols1
,
&
rowValues1
);
getColRow
(
args2
[
i
],
pos2
,
useGpu
,
&
colNum2
,
&
rowCols2
,
&
rowValues2
);
EXPECT_EQ
(
colNum1
,
colNum2
);
for
(
int
j
=
0
;
j
<
colNum1
;
++
j
)
{
EXPECT_EQ
(
rowCols1
[
j
],
rowCols2
[
j
]);
if
(
type
==
SlotDef
::
VECTOR_SPARSE_VALUE
)
{
EXPECT_EQ
(
rowValues1
[
j
],
rowValues2
[
j
]);
}
}
break
;
}
case
SlotDef
::
VAR_MDIM_DENSE
:
case
SlotDef
::
VAR_MDIM_INDEX
:
{
LOG
(
FATAL
)
<<
"Not implemented"
;
break
;
}
case
SlotDef
::
STRING
:
{
EXPECT_EQ
((
*
args1
[
i
].
strs
)[
pos1
],
(
*
args2
[
i
].
strs
)[
pos2
]);
break
;
}
}
}
}
void
testProtoDataProvider
(
int
*
numPerSlotType
,
bool
iid
,
bool
async
,
bool
useGpu
,
bool
dataCompression
,
int
numConstantSlots
=
0
)
{
mkDir
(
kTestDir
);
DataBatch
data
;
prepareData
(
&
data
,
numPerSlotType
,
iid
,
useGpu
);
writeData
(
data
,
useGpu
,
dataCompression
);
DataConfig
config
;
config
.
set_type
(
"proto"
);
config
.
set_files
(
dataCompression
?
kProtoFileListCompressed
:
kProtoFileList
);
config
.
set_async_load_data
(
async
);
for
(
int
i
=
0
;
i
<
numConstantSlots
;
++
i
)
{
config
.
add_constant_slots
(
i
+
11
);
MatrixPtr
w
=
Matrix
::
create
(
data
.
getSize
(),
1
,
/* trans= */
false
,
/* useGpu= */
false
);
w
->
assign
(
config
.
constant_slots
(
i
));
data
.
appendData
(
w
);
}
unique_ptr
<
DataProvider
>
dataProvider
(
DataProvider
::
create
(
config
,
useGpu
));
dataProvider
->
setSkipShuffle
();
EXPECT_EQ
(
data
.
getSize
(),
dataProvider
->
getSize
());
int64_t
batchSize
=
10
;
DataBatch
batch
;
size_t
seq1
=
0
;
vector
<
Argument
>&
args1
=
data
.
getStreams
();
ICpuGpuVectorPtr
sequenceStartPositions1
=
args1
[
0
].
sequenceStartPositions
;
dataProvider
->
reset
();
while
(
dataProvider
->
getNextBatch
(
batchSize
,
&
batch
)
>
0
)
{
CHECK_EQ
(
data
.
getNumStreams
(),
batch
.
getNumStreams
());
vector
<
Argument
>&
args2
=
batch
.
getStreams
();
ICpuGpuVectorPtr
sequenceStartPositions2
=
args2
[
0
].
sequenceStartPositions
;
for
(
auto
&
arg
:
args2
)
{
EXPECT_EQ
(
iid
,
!
arg
.
sequenceStartPositions
);
}
size_t
numSeqs
=
batch
.
getNumSequences
();
VLOG
(
1
)
<<
"numSeqs="
<<
numSeqs
;
for
(
size_t
seq2
=
0
;
seq2
<
numSeqs
;
++
seq1
,
++
seq2
)
{
int64_t
begin1
=
seq1
;
int64_t
end1
=
seq1
+
1
;
if
(
sequenceStartPositions1
)
{
begin1
=
sequenceStartPositions1
->
getElement
(
seq1
);
end1
=
sequenceStartPositions1
->
getElement
(
seq1
+
1
);
EXPECT_LT
(
seq1
,
sequenceStartPositions1
->
getSize
()
-
1
);
}
int64_t
begin2
=
seq2
;
int64_t
end2
=
seq2
+
1
;
if
(
sequenceStartPositions2
)
{
begin2
=
sequenceStartPositions2
->
getElement
(
seq2
);
end2
=
sequenceStartPositions2
->
getElement
(
seq2
+
1
);
}
VLOG
(
1
)
<<
" begin1="
<<
begin1
<<
" end1="
<<
end1
<<
" begin2="
<<
begin2
<<
" end2="
<<
end2
;
EXPECT_EQ
(
end1
-
begin1
,
end2
-
begin2
);
for
(
int
i
=
0
;
i
<
end1
-
begin1
;
++
i
)
{
checkSample
(
args1
,
begin1
+
i
,
args2
,
begin2
+
i
,
useGpu
);
}
}
}
EXPECT_EQ
(
seq1
,
(
size_t
)
data
.
getNumSequences
());
rmDir
(
kTestDir
);
}
TEST
(
ProtoDataProvider
,
test
)
{
int
numSlotsArray
[]
=
{
0
,
3
};
int
numTwoArray
[]
=
{
0
,
1
};
int
numSlotsArraySize
=
sizeof
(
numSlotsArray
)
/
sizeof
(
numSlotsArray
[
0
]);
const
int
numSlot
=
5
;
int
combination
[
numSlot
]
=
{
0
};
int
k
=
numSlot
-
1
;
while
(
k
>=
0
)
{
int
numDenseVecSlots
=
numSlotsArray
[
combination
[
0
]];
int
numSparseNonValueVecSlots
=
numSlotsArray
[
combination
[
1
]];
int
numSparseValueVectorSlots
=
numSlotsArray
[
combination
[
2
]];
int
numStrSlots
=
numSlotsArray
[
combination
[
3
]];
int
numIdSlots
=
numSlotsArray
[
combination
[
4
]];
// while loop : traverse all cases
k
=
numSlot
-
1
;
while
(
k
>=
0
)
{
if
(
combination
[
k
]
<
(
numSlotsArraySize
-
1
))
{
++
combination
[
k
];
break
;
}
else
{
combination
[
k
]
=
0
;
--
k
;
}
}
if
(
numDenseVecSlots
+
numSparseNonValueVecSlots
+
numSparseValueVectorSlots
+
numStrSlots
+
numIdSlots
<
1
)
continue
;
for
(
int
iid
:
numTwoArray
)
{
for
(
int
async
:
numTwoArray
)
{
for
(
int
useGpu
:
numTwoArray
)
{
for
(
int
dataCompression
:
numTwoArray
)
{
if
(
async
&&
useGpu
)
{
// Currently in async mode, useGpu is not supported
continue
;
}
#ifndef PADDLE_WITH_CUDA
if
(
useGpu
)
{
continue
;
}
#endif
LOG
(
INFO
)
<<
" numDenseVecSlots="
<<
numDenseVecSlots
<<
" numSparseNonValueVecSlots="
<<
numSparseNonValueVecSlots
<<
" numSparseValueVectorSlots="
<<
numSparseValueVectorSlots
<<
" numStrSlots="
<<
numStrSlots
<<
" numIdSlots="
<<
numIdSlots
<<
" iid="
<<
iid
<<
" async="
<<
async
<<
" useGpu="
<<
useGpu
<<
" dataCompression="
<<
dataCompression
;
int
numPerSlotType
[
SlotDef
::
SlotType_ARRAYSIZE
]
=
{
0
};
numPerSlotType
[
SlotDef
::
VECTOR_DENSE
]
=
numDenseVecSlots
;
numPerSlotType
[
SlotDef
::
VECTOR_SPARSE_NON_VALUE
]
=
numSparseNonValueVecSlots
;
numPerSlotType
[
SlotDef
::
VECTOR_SPARSE_VALUE
]
=
numSparseValueVectorSlots
;
numPerSlotType
[
SlotDef
::
INDEX
]
=
numIdSlots
;
numPerSlotType
[
SlotDef
::
STRING
]
=
numStrSlots
;
testProtoDataProvider
(
numPerSlotType
,
iid
,
async
,
useGpu
,
dataCompression
);
}
// end for (int dataCompression : numTwoArray)
}
// end for (int useGpu : numTwoArray)
}
// end for (int async : numTwoArray)
}
// end for (int iid : numTwoArray)
}
// end for (while, traverse all slots)
}
TEST
(
ProtoDataProvider
,
constant_slots
)
{
int
numSlotsArray
[]
=
{
0
,
3
};
int
numTwoArray
[]
=
{
0
,
1
};
for
(
int
numDenseVecSlots
:
numSlotsArray
)
{
for
(
int
numSparseNonValueVecSlots
:
numSlotsArray
)
{
if
(
numDenseVecSlots
+
numSparseNonValueVecSlots
<
1
)
continue
;
for
(
int
numConstantSlots
:
{
1
,
2
})
{
for
(
int
useGpu
:
numTwoArray
)
{
for
(
int
dataCompression
:
numTwoArray
)
{
#ifndef PADDLE_WITH_CUDA
if
(
useGpu
)
{
continue
;
}
#endif
LOG
(
INFO
)
<<
" numDenseVecSlots="
<<
numDenseVecSlots
<<
" numSparseNonValueVecSlots="
<<
numSparseNonValueVecSlots
<<
" numConstantSlogs="
<<
numConstantSlots
<<
" useGpu="
<<
useGpu
<<
" dataCompression="
<<
dataCompression
;
int
numPerSlotType
[
SlotDef
::
SlotType_ARRAYSIZE
]
=
{
0
};
numPerSlotType
[
SlotDef
::
VECTOR_DENSE
]
=
numDenseVecSlots
;
numPerSlotType
[
SlotDef
::
VECTOR_SPARSE_NON_VALUE
]
=
numSparseNonValueVecSlots
;
numPerSlotType
[
SlotDef
::
VECTOR_SPARSE_VALUE
]
=
1
;
numPerSlotType
[
SlotDef
::
INDEX
]
=
1
;
testProtoDataProvider
(
numPerSlotType
,
/* iid= */
true
,
/* async= */
false
,
useGpu
,
dataCompression
,
numConstantSlots
);
}
// end for (int dataCompression : numTwoArray)
}
// end for (int useGpu : numTwoArray)
}
// end for (int numConstantSlots : {1, 2})
}
// end for (int numSparseNonValueVecSlots : numSlotsArray)
}
// end for (int numDenseVecSlots : numSlotsArray)
}
void
checkSampleSequence
(
const
vector
<
Argument
>&
args1
,
const
vector
<
Argument
>&
args2
,
int64_t
offset
,
int64_t
numSeqs
,
bool
useGpu
)
{
// check slot num are equal
EXPECT_EQ
(
args1
.
size
(),
args2
.
size
());
for
(
size_t
i
=
0
;
i
<
args1
.
size
();
i
++
)
{
auto
type
=
getSlotType
(
args1
[
i
]);
// check for args2: sequenceStartPositions vs numSeqs
// (1) size
EXPECT_EQ
(
args2
[
i
].
sequenceStartPositions
->
getSize
(),
(
size_t
)
numSeqs
+
1
);
// (2) content
auto
checkArgContent
=
[
&
](
const
Argument
&
args
,
int
numSeqs
)
{
for
(
int
j
=
0
;
j
<=
numSeqs
;
j
++
)
{
int
start_pos
=
args
.
sequenceStartPositions
->
getElement
(
j
);
EXPECT_EQ
(
start_pos
,
j
);
}
};
switch
(
type
)
{
case
SlotDef
::
INDEX
:
{
// args1: for label
checkArgContent
(
args2
[
i
],
numSeqs
);
// check for args2: ids are equal to args1[offset]
// (1) size
EXPECT_EQ
(
args2
[
i
].
ids
->
getSize
(),
(
size_t
)
numSeqs
);
// (2) content
for
(
int
j
=
0
;
j
<
numSeqs
;
j
++
)
{
EXPECT_EQ
(
args2
[
i
].
ids
->
get
(
j
),
args1
[
i
].
ids
->
get
(
offset
+
j
));
}
break
;
}
case
SlotDef
::
VECTOR_SPARSE_NON_VALUE
:
{
// args1: for sparse_non_value
// args2 should put sparse indexes in ids
int
colNum1
;
const
int
*
rowCols1
;
const
real
*
rowValues1
;
// nullptr
int
totalLength
=
0
;
for
(
int
j
=
0
;
j
<
numSeqs
;
j
++
)
{
getColRow
(
args1
[
i
],
offset
+
j
,
useGpu
,
&
colNum1
,
&
rowCols1
,
&
rowValues1
);
// (1) lengths
EXPECT_EQ
(
totalLength
,
args2
[
i
].
sequenceStartPositions
->
getElement
(
j
));
EXPECT_EQ
(
totalLength
,
args2
[
i
].
subSequenceStartPositions
->
getElement
(
j
));
// (2) content
for
(
int
k
=
0
;
k
<
colNum1
;
k
++
)
{
EXPECT_EQ
(
rowCols1
[
k
],
args2
[
i
].
ids
->
get
(
totalLength
+
k
));
}
totalLength
+=
colNum1
;
if
(
colNum1
==
0
)
{
// special case here: we will put a "-1" into ids when column num is
// zero. see ProtoSequenceDataProvider::getNextBatchInternal.
EXPECT_EQ
(
-
1
,
args2
[
i
].
ids
->
get
(
totalLength
));
totalLength
++
;
}
}
EXPECT_EQ
(
totalLength
,
args2
[
i
].
sequenceStartPositions
->
getElement
(
numSeqs
));
EXPECT_EQ
(
totalLength
,
args2
[
i
].
subSequenceStartPositions
->
getElement
(
numSeqs
));
break
;
}
case
SlotDef
::
VECTOR_DENSE
:
{
// args1: for dense vector
checkArgContent
(
args2
[
i
],
numSeqs
);
// check for args2: values are equal to args1[offset]
// (1) size
EXPECT_EQ
(
args2
[
i
].
value
->
getHeight
(),
(
size_t
)
numSeqs
);
EXPECT_EQ
(
args2
[
i
].
value
->
getWidth
(),
(
size_t
)
getSlotDim
(
args1
[
i
]));
// (2) content
for
(
int
j
=
0
;
j
<
numSeqs
;
j
++
)
{
for
(
size_t
k
=
0
;
k
<
args2
[
i
].
value
->
getWidth
();
k
++
)
{
EXPECT_EQ
(
static_cast
<
float
>
(
args1
[
i
].
value
->
getElement
(
j
+
offset
,
k
)),
static_cast
<
float
>
(
args2
[
i
].
value
->
getElement
(
j
,
k
)));
}
}
break
;
}
default:
{
EXPECT_EQ
(
true
,
false
)
<<
"should not reach here"
;
}
}
}
}
void
testProtoSequenceDataProvider
(
int
*
numPerSlotType
,
bool
async
,
bool
useGpu
)
{
mkDir
(
kTestDir
);
DataBatch
data
;
prepareData
(
&
data
,
numPerSlotType
,
/* iid */
true
,
useGpu
);
writeData
(
data
,
useGpu
,
/* dataCompression */
false
);
DataConfig
config
;
config
.
set_type
(
"proto_sequence"
);
config
.
set_files
(
kProtoFileList
);
config
.
set_async_load_data
(
async
);
unique_ptr
<
DataProvider
>
dataProvider
(
DataProvider
::
create
(
config
,
useGpu
));
dataProvider
->
setSkipShuffle
();
EXPECT_EQ
(
data
.
getSize
(),
dataProvider
->
getSize
());
int64_t
batchSize
=
10
;
DataBatch
batch
;
vector
<
Argument
>&
args1
=
data
.
getStreams
();
ICpuGpuVectorPtr
sequenceStartPositions1
=
args1
[
0
].
sequenceStartPositions
;
dataProvider
->
reset
();
size_t
args1Offset
=
0
;
while
(
dataProvider
->
getNextBatch
(
batchSize
,
&
batch
)
>
0
)
{
CHECK_EQ
(
data
.
getNumStreams
(),
batch
.
getNumStreams
());
vector
<
Argument
>&
args2
=
batch
.
getStreams
();
ICpuGpuVectorPtr
sequenceStartPositions2
=
args2
[
0
].
sequenceStartPositions
;
for
(
auto
&
arg
:
args1
)
{
// args1 should not has sequence
EXPECT_EQ
(
true
,
!
arg
.
sequenceStartPositions
);
}
for
(
auto
&
arg
:
args2
)
{
// args2 should has sequence
EXPECT_NE
(
true
,
!
arg
.
sequenceStartPositions
);
}
size_t
numSeqs
=
batch
.
getNumSequences
();
checkSampleSequence
(
args1
,
args2
,
args1Offset
,
numSeqs
,
useGpu
);
args1Offset
+=
numSeqs
;
}
EXPECT_EQ
(
args1Offset
,
(
size_t
)
data
.
getNumSequences
());
rmDir
(
kTestDir
);
}
TEST
(
ProtoSequenceDataProvider
,
test
)
{
int
numSlotsArray
[]
=
{
0
,
3
};
int
numTwoArray
[]
=
{
0
,
1
};
for
(
int
numSparseNonValueVecSlots
:
numSlotsArray
)
{
for
(
int
numIdSlots
:
numSlotsArray
)
{
for
(
int
numDenseVecSlots
:
numSlotsArray
)
{
if
(
numDenseVecSlots
+
numSparseNonValueVecSlots
+
numIdSlots
<
1
)
continue
;
for
(
int
async
:
numTwoArray
)
{
for
(
int
useGpu
:
numTwoArray
)
{
if
(
async
&&
useGpu
)
{
// Currently in async mode, useGpu is not supported
continue
;
}
#ifndef PADDLE_WITH_CUDA
if
(
useGpu
)
{
continue
;
}
#endif
LOG
(
INFO
)
<<
" numDenseVecSlots="
<<
numDenseVecSlots
<<
" numSparseNonValueVecSlots="
<<
numSparseNonValueVecSlots
<<
" numIdSlots="
<<
numIdSlots
<<
" async="
<<
async
<<
" useGpu="
<<
useGpu
;
int
numPerSlotType
[
SlotDef
::
SlotType_ARRAYSIZE
]
=
{
0
};
numPerSlotType
[
SlotDef
::
VECTOR_DENSE
]
=
numDenseVecSlots
;
numPerSlotType
[
SlotDef
::
VECTOR_SPARSE_NON_VALUE
]
=
numSparseNonValueVecSlots
;
numPerSlotType
[
SlotDef
::
INDEX
]
=
numIdSlots
;
testProtoSequenceDataProvider
(
numPerSlotType
,
async
,
useGpu
);
}
// end for (int useGpu : numTwoArray)
}
// end for (int async : numTwoArray)
}
// end for (int numDenseVecSlots : numSlotsArray)
}
// end for (int numIdSlots : numSlotsArray)
}
// end for (int numSparseNonValueVecSlots : numSlotsArray)
}
paddle/math/Storage.cpp
浏览文件 @
5ee63bb6
...
...
@@ -17,9 +17,13 @@ limitations under the License. */
#include "paddle/utils/StringUtil.h"
#include "paddle/utils/Util.h"
#ifndef PADDLE_MOBILE_INFERENCE
DEFINE_int32
(
pool_limit_size
,
536870912
,
"maximum memory size managed by a memory pool, default is 512M"
);
#else
DEFINE_int32
(
pool_limit_size
,
0
,
"default is 0"
);
#endif
namespace
paddle
{
...
...
paddle/operators/CMakeLists.txt
浏览文件 @
5ee63bb6
...
...
@@ -61,6 +61,18 @@ function(op_library TARGET)
set
(
pybind_flag 1
)
endif
()
if
(
"
${
TARGET
}
"
STREQUAL
"compare_op"
)
set
(
pybind_flag 1
)
file
(
APPEND
${
pybind_file
}
"USE_OP(less_than);
\n
USE_OP(equal);
\n
"
)
endif
()
# conv_op contains several operators
if
(
"
${
TARGET
}
"
STREQUAL
"conv_op"
)
set
(
pybind_flag 1
)
# It's enough to just adding one operator to pybind
file
(
APPEND
${
pybind_file
}
"USE_OP(conv2d);
\n
"
)
endif
()
# pool_op contains several operators
if
(
"
${
TARGET
}
"
STREQUAL
"pool_op"
)
set
(
pybind_flag 1
)
...
...
@@ -68,9 +80,11 @@ function(op_library TARGET)
file
(
APPEND
${
pybind_file
}
"USE_OP(pool2d);
\n
"
)
endif
()
if
(
"
${
TARGET
}
"
STREQUAL
"compare_op"
)
# pool_cudnn_op contains several operators
if
(
"
${
TARGET
}
"
STREQUAL
"pool_cudnn_op"
)
set
(
pybind_flag 1
)
file
(
APPEND
${
pybind_file
}
"USE_OP(less_than);
\n
USE_OP(equal);
\n
"
)
# It's enough to just adding one operator to pybind
file
(
APPEND
${
pybind_file
}
"USE_OP(pool2d_cudnn);
\n
"
)
endif
()
# pool_with_index_op contains several operators
...
...
@@ -80,25 +94,18 @@ function(op_library TARGET)
file
(
APPEND
${
pybind_file
}
"USE_OP(max_pool2d_with_index);
\n
"
)
endif
()
# conv_op contains several operators
if
(
"
${
TARGET
}
"
STREQUAL
"conv_op"
)
set
(
pybind_flag 1
)
# It's enough to just adding one operator to pybind
file
(
APPEND
${
pybind_file
}
"USE_OP(conv2d);
\n
"
)
endif
()
# conv_transpose_op contains several operators
if
(
"
${
TARGET
}
"
STREQUAL
"conv_transpose_op"
)
set
(
pybind_flag 1
)
# It's enough to just adding one operator to pybind
file
(
APPEND
${
pybind_file
}
"USE_OP(conv2d_transpose);
\n
"
)
endif
()
#
pool_cudnn_op contains several
operators
if
(
"
${
TARGET
}
"
STREQUAL
"
pool
_cudnn_op"
)
#
conv_transpose_cudnn_op contains two
operators
if
(
"
${
TARGET
}
"
STREQUAL
"
conv_transpose
_cudnn_op"
)
set
(
pybind_flag 1
)
# It's enough to just adding one operator to pybind
file
(
APPEND
${
pybind_file
}
"USE_OP(
pool2d
_cudnn);
\n
"
)
file
(
APPEND
${
pybind_file
}
"USE_OP(
conv2d_transpose
_cudnn);
\n
"
)
endif
()
# save_restore_op contains several operators
...
...
paddle/operators/conv_cudnn_op.cu.cc
浏览文件 @
5ee63bb6
...
...
@@ -226,9 +226,8 @@ class CudnnConvGradOpKernel : public framework::OpKernel<T> {
T
alpha
=
1.0
f
,
beta
=
0.0
f
;
if
(
input_grad
)
{
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
t
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
input_grad
);
t
.
device
(
ctx
.
GetEigenDevice
<
platform
::
GPUPlace
>
())
=
t
.
constant
(
static_cast
<
T
>
(
0
));
// Because beta is zero, it is unnecessary to reset input_grad.
for
(
int
i
=
0
;
i
<
groups
;
i
++
)
{
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnConvolutionBackwardData
(
handle
,
&
alpha
,
cudnn_filter_desc
,
...
...
@@ -241,9 +240,8 @@ class CudnnConvGradOpKernel : public framework::OpKernel<T> {
// ------------------- cudnn conv backward filter ---------------------
if
(
filter_grad
)
{
T
*
filter_grad_data
=
filter_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
t
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
filter_grad
);
t
.
device
(
ctx
.
GetEigenDevice
<
platform
::
GPUPlace
>
())
=
t
.
constant
(
static_cast
<
T
>
(
0
));
// Because beta is zero, it is unnecessary to reset filter_grad.
for
(
int
i
=
0
;
i
<
groups
;
i
++
)
{
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnConvolutionBackwardFilter
(
handle
,
&
alpha
,
cudnn_input_desc
,
input_data
+
i
*
group_offset_in
,
...
...
paddle/operators/conv_op.cc
浏览文件 @
5ee63bb6
...
...
@@ -225,11 +225,15 @@ REGISTER_OP(conv3d, ops::ConvOp, ops::Conv3DOpMaker, conv3d_grad,
ops
::
ConvOpGrad
);
REGISTER_OP_CPU_KERNEL
(
conv2d
,
ops
::
GemmConvKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
GemmConvKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
GemmConvKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
conv2d_grad
,
ops
::
GemmConvGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
conv2d_grad
,
ops
::
GemmConvGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
GemmConvGradKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
conv3d
,
ops
::
GemmConvKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
GemmConvKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
GemmConvKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
conv3d_grad
,
ops
::
GemmConvGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
conv3d_grad
,
ops
::
GemmConvGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
GemmConvGradKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
paddle/operators/conv_op.cu.cc
浏览文件 @
5ee63bb6
...
...
@@ -17,11 +17,15 @@
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_GPU_KERNEL
(
conv2d
,
ops
::
GemmConvKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
ops
::
GemmConvKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
,
ops
::
GemmConvKernel
<
paddle
::
platform
::
GPUPlace
,
double
>
);
REGISTER_OP_GPU_KERNEL
(
conv2d_grad
,
ops
::
GemmConvGradKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
conv2d_grad
,
ops
::
GemmConvGradKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
,
ops
::
GemmConvGradKernel
<
paddle
::
platform
::
GPUPlace
,
double
>
);
REGISTER_OP_GPU_KERNEL
(
conv3d
,
ops
::
GemmConvKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
ops
::
GemmConvKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
,
ops
::
GemmConvKernel
<
paddle
::
platform
::
GPUPlace
,
double
>
);
REGISTER_OP_GPU_KERNEL
(
conv3d_grad
,
ops
::
GemmConvGradKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
conv3d_grad
,
ops
::
GemmConvGradKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
,
ops
::
GemmConvGradKernel
<
paddle
::
platform
::
GPUPlace
,
double
>
);
paddle/operators/conv
2d
_transpose_cudnn_op.cc
→
paddle/operators/conv_transpose_cudnn_op.cc
浏览文件 @
5ee63bb6
...
...
@@ -23,7 +23,24 @@ class CudnnConv2DTransposeOpMaker : public Conv2DTransposeOpMaker {
framework
::
OpAttrChecker
*
op_checker
)
:
Conv2DTransposeOpMaker
(
proto
,
op_checker
)
{
AddAttr
<
std
::
vector
<
int
>>
(
"dilations"
,
"dilations of convolution operator."
)
.
SetDefault
(
std
::
vector
<
int
>
{
1
,
1
});
.
SetDefault
({
1
,
1
});
AddAttr
<
int
>
(
"workspace_size_MB"
,
"workspace size for cudnn, in MB, "
"workspace is a section of GPU memory which will be "
"allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires "
"better hardward. This size should be carefully setted."
)
.
SetDefault
(
4096
);
}
};
class
CudnnConv3DTransposeOpMaker
:
public
Conv3DTransposeOpMaker
{
public:
CudnnConv3DTransposeOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
Conv3DTransposeOpMaker
(
proto
,
op_checker
)
{
AddAttr
<
std
::
vector
<
int
>>
(
"dilations"
,
"dilations of convolution operator."
)
.
SetDefault
({
1
,
1
,
1
});
AddAttr
<
int
>
(
"workspace_size_MB"
,
"workspace size for cudnn, in MB, "
"workspace is a section of GPU memory which will be "
...
...
@@ -48,3 +65,14 @@ REGISTER_OP_CPU_KERNEL(
REGISTER_OP_CPU_KERNEL
(
conv2d_transpose_cudnn_grad
,
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP
(
conv3d_transpose_cudnn
,
ops
::
ConvTransposeOp
,
ops
::
CudnnConv3DTransposeOpMaker
,
conv3d_transpose_cudnn_grad
,
ops
::
ConvTransposeOpGrad
);
REGISTER_OP_CPU_KERNEL
(
conv3d_transpose_cudnn
,
ops
::
GemmConvTransposeKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
conv3d_transpose_cudnn_grad
,
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
paddle/operators/conv
2d
_transpose_cudnn_op.cu.cc
→
paddle/operators/conv_transpose_cudnn_op.cu.cc
浏览文件 @
5ee63bb6
...
...
@@ -54,15 +54,21 @@ class CudnnConvTransposeOpKernel : public framework::OpKernel<T> {
ScopedTensorDescriptor
output_desc
;
ScopedFilterDescriptor
filter_desc
;
ScopedConvolutionDescriptor
conv_desc
;
DataLayout
layout
=
DataLayout
::
kNCHW
;
DataLayout
layout
;
if
(
strides
.
size
()
==
2U
)
{
layout
=
DataLayout
::
kNCHW
;
}
else
{
layout
=
DataLayout
::
kNCDHW
;
}
//
N, M, H, W
//
(N, M, H, W) or (N, M, D, H, W)
cudnnTensorDescriptor_t
cudnn_input_desc
=
input_desc
.
descriptor
<
T
>
(
layout
,
framework
::
vectorize2int
(
input
->
dims
()));
//
N, C, O_h, O_w
//
(N, C, O_h, O_w) or (N, C, O_d, O_h, O_w)
cudnnTensorDescriptor_t
cudnn_output_desc
=
output_desc
.
descriptor
<
T
>
(
layout
,
framework
::
vectorize2int
(
output
->
dims
()));
//
M, C, K_h, K_w
//
(M, C, K_h, K_w) or (M, C, K_d, K_h, K_w)
cudnnFilterDescriptor_t
cudnn_filter_desc
=
filter_desc
.
descriptor
<
T
>
(
layout
,
framework
::
vectorize2int
(
filter
->
dims
()));
cudnnConvolutionDescriptor_t
cudnn_conv_desc
=
...
...
@@ -136,13 +142,13 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel<T> {
ScopedConvolutionDescriptor
conv_desc
;
DataLayout
layout
=
DataLayout
::
kNCHW
;
// Input: (N, M, H, W)
// Input: (N, M, H, W)
or (N, M, D, H, W)
cudnnTensorDescriptor_t
cudnn_input_desc
=
input_desc
.
descriptor
<
T
>
(
layout
,
framework
::
vectorize2int
(
input
->
dims
()));
// Output: (N, C, O_
H, O_W
)
// Output: (N, C, O_
h, O_w) or (N, C, O_d, O_h, O_w
)
cudnnTensorDescriptor_t
cudnn_output_desc
=
output_desc
.
descriptor
<
T
>
(
layout
,
framework
::
vectorize2int
(
output_grad
->
dims
()));
// Filter (M, C, K_
H, K_W
)
// Filter (M, C, K_
h, K_w) or (M, C, K_d K_h, K_w
)
cudnnFilterDescriptor_t
cudnn_filter_desc
=
filter_desc
.
descriptor
<
T
>
(
layout
,
framework
::
vectorize2int
(
filter
->
dims
()));
...
...
@@ -200,8 +206,7 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel<T> {
T
alpha
=
1.0
f
,
beta
=
0.0
f
;
if
(
input_grad
)
{
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
math
::
set_constant
(
ctx
.
device_context
(),
input_grad
,
0
);
// Because beta is zero, it is unnecessary to reset input_grad.
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnConvolutionForward
(
handle
,
&
alpha
,
cudnn_output_desc
,
output_grad_data
,
cudnn_filter_desc
,
filter_data
,
cudnn_conv_desc
,
data_algo
,
...
...
@@ -212,8 +217,7 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel<T> {
// ------------------- cudnn conv backward filter ---------------------
if
(
filter_grad
)
{
T
*
filter_grad_data
=
filter_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
math
::
set_constant
(
ctx
.
device_context
(),
filter_grad
,
0
);
// Because beta is zero, it is unnecessary to reset filter_grad.
// Gradient with respect to the filter
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnConvolutionBackwardFilter
(
handle
,
&
alpha
,
cudnn_output_desc
,
output_grad_data
,
cudnn_input_desc
,
...
...
@@ -234,3 +238,8 @@ REGISTER_OP_GPU_KERNEL(conv2d_transpose_cudnn,
ops
::
CudnnConvTransposeOpKernel
<
float
>
);
REGISTER_OP_GPU_KERNEL
(
conv2d_transpose_cudnn_grad
,
ops
::
CudnnConvTransposeGradOpKernel
<
float
>
);
REGISTER_OP_GPU_KERNEL
(
conv3d_transpose_cudnn
,
ops
::
CudnnConvTransposeOpKernel
<
float
>
);
REGISTER_OP_GPU_KERNEL
(
conv3d_transpose_cudnn_grad
,
ops
::
CudnnConvTransposeGradOpKernel
<
float
>
);
paddle/operators/conv_transpose_op.cc
浏览文件 @
5ee63bb6
...
...
@@ -30,11 +30,6 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
std
::
vector
<
int
>
strides
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"paddings"
);
for
(
size_t
i
=
0
;
i
<
paddings
.
size
();
++
i
)
{
PADDLE_ENFORCE_EQ
(
paddings
[
i
],
0
,
"No Padding allowed in conv transpose op."
);
}
PADDLE_ENFORCE
(
in_dims
.
size
()
==
4
||
in_dims
.
size
()
==
5
,
"ConvTransposeOp intput should be 4-D or 5-D tensor."
);
PADDLE_ENFORCE_EQ
(
in_dims
.
size
(),
filter_dims
.
size
(),
...
...
@@ -52,7 +47,7 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
std
::
vector
<
int64_t
>
output_shape
({
in_dims
[
0
],
filter_dims
[
1
]});
for
(
size_t
i
=
0
;
i
<
strides
.
size
();
++
i
)
{
output_shape
.
push_back
((
in_dims
[
i
+
2
]
-
1
)
*
strides
[
i
]
+
output_shape
.
push_back
((
in_dims
[
i
+
2
]
-
1
)
*
strides
[
i
]
-
2
*
paddings
[
i
]
+
filter_dims
[
i
+
2
]);
}
ctx
->
SetOutputDim
(
"Output"
,
framework
::
make_ddim
(
output_shape
));
...
...
@@ -190,17 +185,21 @@ REGISTER_OP(conv2d_transpose, ops::ConvTransposeOp, ops::Conv2DTransposeOpMaker,
REGISTER_OP_CPU_KERNEL
(
conv2d_transpose
,
ops
::
GemmConvTransposeKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
GemmConvTransposeKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
GemmConvTransposeKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
conv2d_transpose_grad
,
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
REGISTER_OP
(
conv3d_transpose
,
ops
::
ConvTransposeOp
,
ops
::
Conv3DTransposeOpMaker
,
conv3d_transpose_grad
,
ops
::
ConvTransposeOpGrad
);
REGISTER_OP_CPU_KERNEL
(
conv3d_transpose
,
ops
::
GemmConvTransposeKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
GemmConvTransposeKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
GemmConvTransposeKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
conv3d_transpose_grad
,
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
paddle/operators/conv_transpose_op.cu.cc
浏览文件 @
5ee63bb6
...
...
@@ -18,14 +18,18 @@ namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL
(
conv2d_transpose
,
ops
::
GemmConvTransposeKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
ops
::
GemmConvTransposeKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
,
ops
::
GemmConvTransposeKernel
<
paddle
::
platform
::
GPUPlace
,
double
>
);
REGISTER_OP_GPU_KERNEL
(
conv2d_transpose_grad
,
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
,
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
GPUPlace
,
double
>
);
REGISTER_OP_GPU_KERNEL
(
conv3d_transpose
,
ops
::
GemmConvTransposeKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
ops
::
GemmConvTransposeKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
,
ops
::
GemmConvTransposeKernel
<
paddle
::
platform
::
GPUPlace
,
double
>
);
REGISTER_OP_GPU_KERNEL
(
conv3d_transpose_grad
,
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
,
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
GPUPlace
,
double
>
);
paddle/operators/conv_transpose_op.h
浏览文件 @
5ee63bb6
...
...
@@ -62,7 +62,6 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
Tensor
*
output
=
context
.
Output
<
Tensor
>
(
"Output"
);
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
// Actually, no paddings and groups allowed in conv transpose.
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
// TODO(Zhuoyuan): Paddings can be added in future.
// groups will alway be disabled in conv2dtranspose.
...
...
@@ -148,8 +147,8 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
}
else
if
(
filter_shape_vec
.
size
()
==
3
)
{
// col2vol: col_matrix -> dy
// from (c * k_d * k_h * k_w, d * h * w) to (c, o_d, o_h, o_w)
col2vol
(
context
.
device_context
(),
col
,
dilations
,
strides
,
std
::
vector
<
int
>
{
0
,
0
,
0
},
&
output_batch
);
col2vol
(
context
.
device_context
(),
col
,
dilations
,
strides
,
paddings
,
&
output_batch
);
}
}
}
...
...
@@ -173,7 +172,6 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
if
((
!
input_grad
)
&&
(
!
filter_grad
))
return
;
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
// Actually, no paddings and groups allowed in conv transpose.
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
...
...
paddle/operators/gru_op.h
浏览文件 @
5ee63bb6
...
...
@@ -24,8 +24,17 @@
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
LoDTensor
=
framework
::
LoDTensor
;
using
Tensor
=
framework
::
Tensor
;
template
<
typename
Place
,
typename
T
>
inline
void
ReorderInitState
(
const
platform
::
DeviceContext
&
ctx
,
const
framework
::
Tensor
&
src
,
const
size_t
*
index
,
framework
::
Tensor
*
dst
,
bool
indexed_src
)
{
math
::
CopyMatrixRowsFunctor
<
Place
,
T
>
row_shuffle
;
dst
->
mutable_data
<
T
>
(
src
.
dims
(),
ctx
.
GetPlace
());
row_shuffle
(
ctx
,
src
,
index
,
*
dst
,
indexed_src
);
}
template
<
typename
Place
,
typename
T
>
class
GRUKernel
:
public
framework
::
OpKernel
<
T
>
{
...
...
@@ -33,7 +42,6 @@ class GRUKernel : public framework::OpKernel<T> {
void
BatchCompute
(
const
framework
::
ExecutionContext
&
context
)
const
{
auto
*
input
=
context
.
Input
<
LoDTensor
>
(
"Input"
);
auto
*
h0
=
context
.
Input
<
Tensor
>
(
"H0"
);
const
T
*
h0_data
=
h0
?
h0
->
data
<
T
>
()
:
nullptr
;
auto
*
weight
=
context
.
Input
<
Tensor
>
(
"Weight"
);
const
T
*
weight_data
=
weight
->
data
<
T
>
();
auto
*
bias
=
context
.
Input
<
Tensor
>
(
"Bias"
);
...
...
@@ -66,7 +74,18 @@ class GRUKernel : public framework::OpKernel<T> {
gru_value
.
gateWeight
=
const_cast
<
T
*>
(
weight_data
);
gru_value
.
stateWeight
=
const_cast
<
T
*>
(
weight_data
+
2
*
frame_size
*
frame_size
);
gru_value
.
prevOutValue
=
const_cast
<
T
*>
(
h0_data
);
Tensor
ordered_h0
;
const
size_t
*
order
=
batch_gate
->
lod
()[
2
].
data
();
if
(
h0
)
{
// Since the batch computing for GRU reorders the input sequences
// according to their length. The initialized cell state also needs
// to reorder.
ReorderInitState
<
Place
,
T
>
(
context
.
device_context
(),
*
h0
,
order
,
&
ordered_h0
,
true
);
gru_value
.
prevOutValue
=
ordered_h0
.
data
<
T
>
();
}
else
{
gru_value
.
prevOutValue
=
nullptr
;
}
auto
batch_starts
=
batch_gate
->
lod
()[
0
];
size_t
num_batch
=
batch_starts
.
size
()
-
1
;
for
(
size_t
n
=
0
;
n
<
num_batch
;
n
++
)
{
...
...
@@ -102,7 +121,6 @@ class GRUGradKernel : public framework::OpKernel<T> {
public:
void
BatchCompute
(
const
framework
::
ExecutionContext
&
context
)
const
{
auto
*
h0
=
context
.
Input
<
Tensor
>
(
"H0"
);
const
T
*
h0_data
=
h0
?
h0
->
data
<
T
>
()
:
nullptr
;
auto
*
weight
=
context
.
Input
<
Tensor
>
(
"Weight"
);
const
T
*
weight_data
=
weight
->
data
<
T
>
();
auto
*
batch_gate
=
context
.
Input
<
LoDTensor
>
(
"BatchGate"
);
...
...
@@ -135,6 +153,17 @@ class GRUGradKernel : public framework::OpKernel<T> {
zero
(
dev_ctx
,
&
batch_gate_grad
,
static_cast
<
T
>
(
0.0
));
zero
(
dev_ctx
,
&
batch_reset_hidden_prev_grad
,
static_cast
<
T
>
(
0.0
));
Tensor
ordered_h0
,
ordered_h0_grad
;
const
size_t
*
order
=
batch_gate
->
lod
()[
2
].
data
();
if
(
h0
)
{
ReorderInitState
<
Place
,
T
>
(
context
.
device_context
(),
*
h0
,
order
,
&
ordered_h0
,
true
);
}
if
(
h0_grad
)
{
ordered_h0_grad
.
mutable_data
<
T
>
(
h0_grad
->
dims
(),
context
.
GetPlace
());
zero
(
context
.
device_context
(),
&
ordered_h0_grad
,
static_cast
<
T
>
(
0.0
));
}
bool
is_reverse
=
context
.
Attr
<
bool
>
(
"is_reverse"
);
batch_hidden_grad
.
set_lod
(
batch_hidden
->
lod
());
to_batch
(
dev_ctx
,
*
hidden_grad
,
batch_hidden_grad
,
false
,
is_reverse
);
...
...
@@ -176,14 +205,9 @@ class GRUGradKernel : public framework::OpKernel<T> {
batch_reset_hidden_prev_grad
.
Slice
(
bstart
,
bend
);
gru_grad
.
resetOutputGrad
=
reset_hidden_prev_grad_t
.
data
<
T
>
();
if
(
n
==
0
)
{
gru_value
.
prevOutValue
=
const_cast
<
T
*>
(
h0_data
);
if
(
h0_grad
)
{
T
*
h0_grad_data
=
h0_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
zero
(
dev_ctx
,
h0_grad
,
static_cast
<
T
>
(
0.0
));
gru_grad
.
prevOutGrad
=
h0_grad_data
;
}
else
{
gru_grad
.
prevOutGrad
=
nullptr
;
}
gru_value
.
prevOutValue
=
h0
?
ordered_h0
.
data
<
T
>
()
:
nullptr
;
gru_grad
.
prevOutGrad
=
h0
&&
h0_grad
?
ordered_h0_grad
.
data
<
T
>
()
:
nullptr
;
}
else
{
int
bstart_pre
=
static_cast
<
int
>
(
batch_starts
[
n
-
1
]);
Tensor
hidden_prev_t
=
batch_hidden
->
Slice
(
bstart_pre
,
bstart
);
...
...
@@ -208,6 +232,10 @@ class GRUGradKernel : public framework::OpKernel<T> {
math
::
ColwiseSum
<
Place
,
T
>
col_sum
;
col_sum
(
dev_ctx
,
batch_gate_grad
,
bias_grad
);
}
if
(
h0
&&
h0_grad
)
{
ReorderInitState
<
Place
,
T
>
(
context
.
device_context
(),
ordered_h0_grad
,
order
,
h0_grad
,
false
);
}
}
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
...
...
paddle/operators/math/CMakeLists.txt
浏览文件 @
5ee63bb6
add_subdirectory
(
detail
)
if
(
WITH_GPU
)
nv_library
(
math_function SRCS math_function.cc math_function.cu im2col.cc im2col.cu DEPS cblas device_context
)
nv_library
(
math_function SRCS math_function.cc math_function.cu im2col.cc im2col.cu DEPS cblas device_context
framework_proto
)
nv_test
(
math_function_gpu_test SRCS math_function_test.cu DEPS math_function tensor
)
nv_library
(
selected_rows_functor SRCS selected_rows_functor.cc selected_rows_functor.cu DEPS selected_rows math_function
)
nv_test
(
selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu DEPS selected_rows_functor
)
...
...
@@ -15,7 +15,7 @@ if(WITH_GPU)
nv_library
(
lstm_compute SRCS lstm_compute.cc lstm_compute.cu DEPS device_context activation_functions
)
nv_library
(
gru_compute SRCS gru_compute.cc gru_compute.cu DEPS device_context activation_functions math_function
)
else
()
cc_library
(
math_function SRCS math_function.cc im2col.cc DEPS cblas device_context
)
cc_library
(
math_function SRCS math_function.cc im2col.cc DEPS cblas device_context
framework_proto
)
cc_library
(
selected_rows_functor SRCS selected_rows_functor.cc DEPS selected_rows math_function
)
cc_library
(
softmax SRCS softmax.cc DEPS device_context
)
cc_library
(
cross_entropy SRCS cross_entropy.cc DEPS device_context
)
...
...
paddle/operators/math/im2col.cu
浏览文件 @
5ee63bb6
...
...
@@ -119,8 +119,8 @@ __global__ void col2im(int n, const T* data_col, int im_height, int im_width,
if
(
index
<
n
)
{
T
val
=
0
;
int
w
=
index
%
im_width
;
int
h
=
(
index
/
im_width
)
%
im_height
;
int
w
=
index
%
im_width
+
padding_width
;
int
h
=
(
index
/
im_width
)
%
im_height
+
padding_height
;
int
c
=
index
/
(
im_width
*
im_height
);
// compute the start and end of the output
...
...
paddle/operators/pool_cudnn_op.cu.cc
浏览文件 @
5ee63bb6
...
...
@@ -135,8 +135,7 @@ class PoolCudnnGradOpKernel : public framework::OpKernel<T> {
if
(
input_grad
)
{
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
math
::
SetConstant
<
paddle
::
platform
::
GPUPlace
,
T
>
set_zero
;
set_zero
(
ctx
.
device_context
(),
input_grad
,
static_cast
<
T
>
(
0
));
// Because beta is zero, it is unnecessary to reset input_grad.
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnPoolingBackward
(
handle
,
cudnn_pool_desc
,
&
alpha
,
cudnn_output_desc
,
output_data
,
...
...
paddle/parameter/ParameterUpdateFunctions.cpp
浏览文件 @
5ee63bb6
...
...
@@ -30,7 +30,7 @@ void sgdUpdateCpu(real learningRate,
const
real
*
grad
,
real
*
momentumVec
)
{
decayRate
*=
learningRate
;
#ifdef PADDLE_USE_MKL
DNN
#ifdef PADDLE_USE_MKL
ML
#pragma omp parallel for
#endif
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
...
...
paddle/platform/cudnn_helper.h
浏览文件 @
5ee63bb6
...
...
@@ -63,9 +63,10 @@ inline const char* cudnnGetErrorString(cudnnStatus_t status) {
} \
} while (false)
enum
class
DataLayout
{
enum
class
DataLayout
{
// Not use
kNHWC
,
kNCHW
,
kNCDHW
,
kNCHW_VECT_C
,
};
...
...
@@ -107,12 +108,15 @@ class CudnnDataType<double> {
}
};
inline
cudnnTensorFormat_t
GetCudnnTensorFormat
(
const
DataLayout
&
order
)
{
inline
cudnnTensorFormat_t
GetCudnnTensorFormat
(
const
DataLayout
&
order
)
{
// Not use
switch
(
order
)
{
case
DataLayout
::
kNHWC
:
return
CUDNN_TENSOR_NHWC
;
case
DataLayout
::
kNCHW
:
return
CUDNN_TENSOR_NCHW
;
case
DataLayout
::
kNCDHW
:
return
CUDNN_TENSOR_NCHW
;
// TODO(chengduoZH) : add CUDNN_TENSOR_NCDHW
default:
PADDLE_THROW
(
"Unknown cudnn equivalent for order"
);
}
...
...
@@ -139,7 +143,7 @@ class ScopedTensorDescriptor {
strides
[
i
]
=
dims
[
i
+
1
]
*
strides
[
i
+
1
];
}
// Update tensor descriptor dims setting if groups > 1
// FIXME(typhoonzero): Assume using NCHW order
// FIXME(typhoonzero): Assume using NCHW or
NCDHW or
der
std
::
vector
<
int
>
dims_with_group
(
dims
.
begin
(),
dims
.
end
());
// copy
if
(
groups
>
1
)
{
dims_with_group
[
1
]
=
dims_with_group
[
1
]
/
groups
;
...
...
@@ -176,9 +180,10 @@ class ScopedFilterDescriptor {
const
cudnnDataType_t
type
,
const
std
::
vector
<
int
>&
kernel
,
const
int
groups
=
1
)
{
// filter layout: MCHW, where M is the number of
// filter layout: MCHW
(MCDHW)
, where M is the number of
// output image channels, C is the number of input image channels,
// H and W is height and width of filter.
// D is the depth of the filter, H is the height of the filter, and W is the
// width of the filter.
std
::
vector
<
int
>
kernel_with_group
(
kernel
.
begin
(),
kernel
.
end
());
if
(
groups
>
1
)
{
// M /= groups
...
...
paddle/scripts/docker/README.md
浏览文件 @
5ee63bb6
...
...
@@ -57,8 +57,7 @@ Users can specify the following Docker build arguments with either "ON" or "OFF"
|
`WITH_GPU`
| OFF | Generates NVIDIA CUDA GPU code and relies on CUDA libraries. |
|
`WITH_AVX`
| OFF | Set to "ON" to enable AVX support. |
|
`WITH_TESTING`
| ON | Build unit tests binaries. |
|
`WITH_MKLDNN`
| ON | Build with
[
Intel® MKL DNN
](
https://github.com/01org/mkl-dnn
)
support. |
|
`WITH_MKLML`
| ON | Build with
[
Intel® MKL
](
https://software.intel.com/en-us/mkl
)
support. |
|
`WITH_MKL`
| ON | Build with
[
Intel® MKL
](
https://software.intel.com/en-us/mkl
)
and
[
Intel® MKL-DNN
](
https://github.com/01org/mkl-dnn
)
support. |
|
`WITH_GOLANG`
| ON | Build fault-tolerant parameter server written in go. |
|
`WITH_SWIG_PY`
| ON | Build with SWIG python API support. |
|
`WITH_C_API`
| OFF | Build capi libraries for inference. |
...
...
paddle/scripts/docker/build.sh
浏览文件 @
5ee63bb6
...
...
@@ -34,8 +34,7 @@ function cmake_gen() {
${
PYTHON_FLAGS
}
-DWITH_DOC=OFF
-DWITH_GPU=
${
WITH_GPU
:-
OFF
}
-DWITH_MKLDNN=
${
WITH_MKLDNN
:-
ON
}
-DWITH_MKLML=
${
WITH_MKLML
:-
ON
}
-DWITH_MKL=
${
WITH_MKL
:-
ON
}
-DWITH_AVX=
${
WITH_AVX
:-
OFF
}
-DWITH_GOLANG=
${
WITH_GOLANG
:-
ON
}
-DWITH_SWIG_PY=ON
...
...
@@ -56,8 +55,7 @@ EOF
${
PYTHON_FLAGS
}
\
-DWITH_DOC
=
OFF
\
-DWITH_GPU
=
${
WITH_GPU
:-
OFF
}
\
-DWITH_MKLDNN
=
${
WITH_MKLDNN
:-
ON
}
\
-DWITH_MKLML
=
${
WITH_MKLML
:-
ON
}
\
-DWITH_MKL
=
${
WITH_MKL
:-
ON
}
\
-DWITH_AVX
=
${
WITH_AVX
:-
OFF
}
\
-DWITH_GOLANG
=
${
WITH_GOLANG
:-
ON
}
\
-DWITH_SWIG_PY
=
${
WITH_SWIG_PY
:-
ON
}
\
...
...
paddle/scripts/submit_local.sh.in
浏览文件 @
5ee63bb6
...
...
@@ -18,8 +18,8 @@ function version(){
echo
"PaddlePaddle @PADDLE_VERSION@, compiled with"
echo
" with_avx: @WITH_AVX@"
echo
" with_gpu: @WITH_GPU@"
echo
" with_mkl: @WITH_MKL@"
echo
" with_mkldnn: @WITH_MKLDNN@"
echo
" with_mklml: @WITH_MKLML@"
echo
" with_double: @WITH_DOUBLE@"
echo
" with_python: @WITH_PYTHON@"
echo
" with_rdma: @WITH_RDMA@"
...
...
@@ -45,8 +45,8 @@ function ver2num() {
function
cpu_config
()
{
# auto set KMP_AFFINITY and OMP_DYNAMIC from Hyper Threading Status
# only when MKL
DNN or MKLML
enabled
if
[
"@WITH_MKL
DNN@"
==
"OFF"
]
&&
[
"@WITH_MKLML@"
==
"OFF"
]
;
then
# only when MKL enabled
if
[
"@WITH_MKL
@"
==
"OFF"
]
;
then
return
0
fi
ht
=
`
lscpu |grep
"per core"
|awk
-F
':'
'{print $2}'
|xargs
`
...
...
@@ -70,8 +70,8 @@ function cpu_config() {
function
threads_config
()
{
# auto set OMP_NUM_THREADS and MKL_NUM_THREADS
# according to trainer_count and total processors
# only when MKL
DNN or MKLML
enabled
if
[
"@WITH_MKL
DNN@"
==
"OFF"
]
&&
[
"@WITH_MKLML@"
==
"OFF"
]
;
then
# only when MKL enabled
if
[
"@WITH_MKL
@"
==
"OFF"
]
;
then
return
0
fi
processors
=
`
grep
"processor"
/proc/cpuinfo|sort
-u
|wc
-l
`
...
...
paddle/scripts/travis/build_doc.sh
浏览文件 @
5ee63bb6
...
...
@@ -6,7 +6,7 @@ mkdir -p $TRAVIS_BUILD_DIR/build
cd
$TRAVIS_BUILD_DIR
/build
# Compile Documentation only.
cmake ..
-DCMAKE_BUILD_TYPE
=
Debug
-DWITH_GPU
=
OFF
-DWITH_MKL
DNN
=
OFF
-DWITH_MKLML
=
OFF
-DWITH_DOC
=
ON
cmake ..
-DCMAKE_BUILD_TYPE
=
Debug
-DWITH_GPU
=
OFF
-DWITH_MKL
=
OFF
-DWITH_DOC
=
ON
make
-j
`
nproc
`
gen_proto_py
make
-j
`
nproc
`
paddle_docs paddle_docs_cn
...
...
paddle/trainer/Trainer.cpp
浏览文件 @
5ee63bb6
...
...
@@ -137,6 +137,10 @@ void Trainer::init(const std::shared_ptr<TrainerConfigHelper>& config,
}
}
if
(
FLAGS_use_mkldnn
)
{
CHECK_EQ
(
FLAGS_trainer_count
,
1UL
)
<<
"MKLDNN only need 1 trainer"
;
}
if
(
testing
)
{
LOG
(
INFO
)
<<
"trainer: in testing mode"
;
if
(
config_
->
getOptConfig
().
use_sparse_remote_updater
()
||
...
...
paddle/trainer/tests/CMakeLists.txt
浏览文件 @
5ee63bb6
...
...
@@ -28,35 +28,7 @@ if(WITH_PYTHON)
${
PADDLE_SOURCE_DIR
}
/paddle/.set_port.sh -p port
${
CMAKE_CURRENT_BINARY_DIR
}
/test_TrainerOnePass
WORKING_DIRECTORY
${
PADDLE_SOURCE_DIR
}
/paddle/
)
endif
()
################ test_CompareTwoNets ######################
add_unittest_without_exec
(
test_CompareTwoNets
test_CompareTwoNets.cpp
)
add_test
(
NAME test_CompareTwoNets
COMMAND
${
PADDLE_SOURCE_DIR
}
/paddle/.set_python_path.sh -d
${
PADDLE_SOURCE_DIR
}
/python/
${
CMAKE_CURRENT_BINARY_DIR
}
/test_CompareTwoNets
--config_file_a=trainer/tests/sample_trainer_config_qb_rnn.conf --config_file_b=trainer/tests/sample_trainer_config_rnn.conf
WORKING_DIRECTORY
${
PADDLE_SOURCE_DIR
}
/paddle/
)
############### test_CompareTwoOpts ###################
add_unittest_without_exec
(
test_CompareTwoOpts
test_CompareTwoOpts.cpp
)
add_test
(
NAME test_CompareTwoOpts
COMMAND
${
PADDLE_SOURCE_DIR
}
/paddle/.set_python_path.sh -d
${
PADDLE_SOURCE_DIR
}
/python/
${
CMAKE_CURRENT_BINARY_DIR
}
/test_CompareTwoOpts
--config_file_a=trainer/tests/sample_trainer_config_opt_a.conf --config_file_b=trainer/tests/sample_trainer_config_opt_b.conf
--num_passes=1 --need_high_accuracy=0
WORKING_DIRECTORY
${
PADDLE_SOURCE_DIR
}
/paddle/
)
################# test_CompareSparse ##################
add_unittest_without_exec
(
test_CompareSparse
test_CompareSparse.cpp
)
if
(
NOT ON_TRAVIS
)
add_test
(
NAME test_CompareSparse
COMMAND
${
PADDLE_SOURCE_DIR
}
/paddle/.set_python_path.sh -d
${
PADDLE_SOURCE_DIR
}
/python/
./.set_port.sh -p port -n 6
${
CMAKE_CURRENT_BINARY_DIR
}
/test_CompareSparse
WORKING_DIRECTORY
${
PADDLE_SOURCE_DIR
}
/paddle/
)
endif
()
################# test_recurrent_machine_generation ###############
add_unittest_without_exec
(
test_recurrent_machine_generation
test_recurrent_machine_generation.cpp
)
...
...
paddle/trainer/tests/mnist.list
已删除
100644 → 0
浏览文件 @
571ef90c
trainer/tests/mnist_bin_part
paddle/trainer/tests/mnist_bin_part
已删除
100644 → 0
浏览文件 @
571ef90c
文件已删除
paddle/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.proto_data
已删除
100644 → 0
浏览文件 @
571ef90c
文件已删除
paddle/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.protolist
已删除
100644 → 0
浏览文件 @
571ef90c
./trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.proto_data
paddle/trainer/tests/sample_trainer_config_compare_sparse.conf
已删除
100644 → 0
浏览文件 @
571ef90c
#edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
# Note: when making change to this file, please make sure
# sample_trainer_config_rnn.conf is changed accordingly so that the uniitest
# for comparing these two nets can pass (test_CompareTwoNets)
default_initial_std
(
0
.
1
)
default_device
(
0
)
word_dim
=
999
l1
=
0
l2
=
0
model_type
(
"nn"
)
sparse_update
=
get_config_arg
(
"sparse_update"
,
bool
,
False
)
TrainData
(
ProtoData
(
type
=
"proto_sequence"
,
files
= (
'trainer/tests/train_sparse.list'
),
))
Settings
(
algorithm
=
'sgd'
,
batch_size
=
100
,
learning_rate
=
0
.
0001
,
learning_rate_decay_a
=
4
e
-
08
,
learning_rate_decay_b
=
0
.
0
,
learning_rate_schedule
=
'poly'
,
)
wordvec_dim
=
32
layer2_dim
=
16
layer3_dim
=
16
hidden_dim
=
32
slot_names
= [
"qb"
,
"qw"
,
"tb"
,
"tw"
]
def
ltr_network
(
network_name
,
word_dim
=
word_dim
,
wordvec_dim
=
wordvec_dim
,
layer2_dim
=
layer2_dim
,
layer3_dim
=
layer3_dim
,
hidden_dim
=
hidden_dim
,
slot_names
=
slot_names
,
l1
=
l1
,
l2
=
l2
):
slotnum
=
len
(
slot_names
)
for
i
in
xrange
(
slotnum
):
Inputs
(
slot_names
[
i
] +
network_name
)
for
i
in
xrange
(
slotnum
):
Layer
(
name
=
slot_names
[
i
] +
network_name
,
type
=
"data"
,
size
=
word_dim
,
device
= -
1
,
)
Layer
(
name
=
slot_names
[
i
] +
"_embedding_"
+
network_name
,
type
=
"mixed"
,
size
=
wordvec_dim
,
bias
=
False
,
device
= -
1
,
inputs
=
TableProjection
(
slot_names
[
i
] +
network_name
,
parameter_name
=
"embedding.w0"
,
decay_rate_l1
=
l1
,
sparse_remote_update
=
True
,
sparse_update
=
sparse_update
,
),
)
Layer
(
name
=
slot_names
[
i
] +
"_rnn1_"
+
network_name
,
type
=
"recurrent"
,
active_type
=
"tanh"
,
bias
=
Bias
(
initial_std
=
0
,
parameter_name
=
"rnn1.bias"
),
inputs
=
Input
(
slot_names
[
i
] +
"_embedding_"
+
network_name
,
parameter_name
=
"rnn1.w0"
)
)
Layer
(
name
=
slot_names
[
i
] +
"_rnnlast_"
+
network_name
,
type
=
"seqlastins"
,
inputs
= [
slot_names
[
i
] +
"_rnn1_"
+
network_name
,
],
)
Layer
(
name
=
"layer2_"
+
network_name
,
type
=
"fc"
,
active_type
=
"tanh"
,
size
=
layer2_dim
,
bias
=
Bias
(
parameter_name
=
"layer2.bias"
),
inputs
= [
Input
(
slot_name
+
"_rnnlast_"
+
network_name
,
parameter_name
=
"_layer2_"
+
slot_name
+
".w"
,
decay_rate
=
l2
,
initial_smart
=
True
)
for
slot_name
in
slot_names
]
)
Layer
(
name
=
"layer3_"
+
network_name
,
type
=
"fc"
,
active_type
=
"tanh"
,
size
=
layer3_dim
,
bias
=
Bias
(
parameter_name
=
"layer3.bias"
),
inputs
= [
Input
(
"layer2_"
+
network_name
,
parameter_name
=
"_layer3.w"
,
decay_rate
=
l2
,
initial_smart
=
True
),
]
)
Layer
(
name
=
"output_"
+
network_name
,
type
=
"fc"
,
size
=
1
,
bias
=
False
,
inputs
= [
Input
(
"layer3_"
+
network_name
,
parameter_name
=
"_layerO.w"
),
],
)
ltr_network
(
"left"
)
ltr_network
(
"right"
)
Inputs
(
"label"
)
Layer
(
name
=
"label"
,
type
=
"data"
,
size
=
1
,
)
Outputs
(
"cost"
,
"qb_rnnlast_left"
)
Layer
(
name
=
"cost"
,
type
=
"rank-cost"
,
inputs
= [
"output_left"
,
"output_right"
,
"label"
],
)
paddle/trainer/tests/sample_trainer_config_qb_rnn.conf
已删除
100644 → 0
浏览文件 @
571ef90c
#edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
# Note: when making change to this file, please make sure
# sample_trainer_config_rnn.conf is changed accordingly so that the uniitest
# for comparing these two nets can pass (test_CompareTwoNets)
default_initial_std
(
0
.
1
)
default_device
(
0
)
word_dim
=
1451594
l1
=
0
l2
=
0
model_type
(
"nn"
)
sparse_update
=
get_config_arg
(
"sparse_update"
,
bool
,
False
)
TrainData
(
ProtoData
(
type
=
"proto_sequence"
,
files
= (
'trainer/tests/train.list'
),
))
Settings
(
algorithm
=
'sgd'
,
batch_size
=
100
,
learning_rate
=
0
.
0001
,
learning_rate_decay_a
=
4
e
-
08
,
learning_rate_decay_b
=
0
.
0
,
learning_rate_schedule
=
'poly'
,
)
wordvec_dim
=
128
layer2_dim
=
96
layer3_dim
=
96
hidden_dim
=
128
slot_names
= [
"qb"
,
"qw"
,
"tb"
,
"tw"
]
def
ltr_network
(
network_name
,
word_dim
=
word_dim
,
wordvec_dim
=
wordvec_dim
,
layer2_dim
=
layer2_dim
,
layer3_dim
=
layer3_dim
,
hidden_dim
=
hidden_dim
,
slot_names
=
slot_names
,
l1
=
l1
,
l2
=
l2
):
slotnum
=
len
(
slot_names
)
for
i
in
xrange
(
slotnum
):
Inputs
(
slot_names
[
i
] +
network_name
)
for
i
in
xrange
(
slotnum
):
Layer
(
name
=
slot_names
[
i
] +
network_name
,
type
=
"data"
,
size
=
word_dim
,
device
= -
1
,
)
Layer
(
name
=
slot_names
[
i
] +
"_embedding_"
+
network_name
,
type
=
"mixed"
,
size
=
wordvec_dim
,
bias
=
False
,
device
= -
1
,
inputs
=
TableProjection
(
slot_names
[
i
] +
network_name
,
parameter_name
=
"embedding.w0"
,
decay_rate_l1
=
l1
,
sparse_remote_update
=
True
,
sparse_update
=
sparse_update
,
),
)
Layer
(
name
=
slot_names
[
i
] +
"_rnn1_"
+
network_name
,
type
=
"recurrent"
,
active_type
=
"tanh"
,
bias
=
Bias
(
initial_std
=
0
,
parameter_name
=
"rnn1.bias"
),
inputs
=
Input
(
slot_names
[
i
] +
"_embedding_"
+
network_name
,
parameter_name
=
"rnn1.w0"
)
)
Layer
(
name
=
slot_names
[
i
] +
"_rnnlast_"
+
network_name
,
type
=
"seqlastins"
,
inputs
= [
slot_names
[
i
] +
"_rnn1_"
+
network_name
,
],
)
Layer
(
name
=
"layer2_"
+
network_name
,
type
=
"fc"
,
active_type
=
"tanh"
,
size
=
layer2_dim
,
bias
=
Bias
(
parameter_name
=
"layer2.bias"
),
inputs
= [
Input
(
slot_name
+
"_rnnlast_"
+
network_name
,
parameter_name
=
"_layer2_"
+
slot_name
+
".w"
,
decay_rate
=
l2
,
initial_smart
=
True
)
for
slot_name
in
slot_names
]
)
Layer
(
name
=
"layer3_"
+
network_name
,
type
=
"fc"
,
active_type
=
"tanh"
,
size
=
layer3_dim
,
bias
=
Bias
(
parameter_name
=
"layer3.bias"
),
inputs
= [
Input
(
"layer2_"
+
network_name
,
parameter_name
=
"_layer3.w"
,
decay_rate
=
l2
,
initial_smart
=
True
),
]
)
Layer
(
name
=
"output_"
+
network_name
,
type
=
"fc"
,
size
=
1
,
bias
=
False
,
inputs
= [
Input
(
"layer3_"
+
network_name
,
parameter_name
=
"_layerO.w"
),
],
)
ltr_network
(
"left"
)
ltr_network
(
"right"
)
Inputs
(
"label"
)
Layer
(
name
=
"label"
,
type
=
"data"
,
size
=
1
,
)
Outputs
(
"cost"
,
"qb_rnnlast_left"
)
Layer
(
name
=
"cost"
,
type
=
"rank-cost"
,
inputs
= [
"output_left"
,
"output_right"
,
"label"
],
)
paddle/trainer/tests/sample_trainer_config_rnn.conf
已删除
100644 → 0
浏览文件 @
571ef90c
#edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
# Note: when making change to this file, please make sure
# sample_trainer_config_qb_rnn.conf is changed accordingly so that the uniitest
# for comparing these two nets can pass (test_CompareTwoNets)
default_initial_std
(
0
.
1
)
default_device
(
0
)
word_dim
=
1451594
l1
=
0
l2
=
0
model_type
(
"recurrent_nn"
)
sparse_update
=
get_config_arg
(
"sparse_update"
,
bool
,
False
)
TrainData
(
ProtoData
(
type
=
"proto_sequence"
,
files
= (
'trainer/tests/train.list'
),
))
Settings
(
algorithm
=
'sgd'
,
batch_size
=
100
,
learning_rate
=
0
.
0001
,
learning_rate_decay_a
=
4
e
-
08
,
learning_rate_decay_b
=
0
.
0
,
learning_rate_schedule
=
'poly'
,
)
wordvec_dim
=
128
layer2_dim
=
96
layer3_dim
=
96
hidden_dim
=
128
slot_names
= [
"qb"
,
"qw"
,
"tb"
,
"tw"
]
def
SimpleRecurrentLayer
(
name
,
size
,
active_type
,
bias
,
input_layer_name
,
parameter_name
,
seq_reversed
=
False
):
RecurrentLayerGroupBegin
(
name
+
"_layer_group"
,
in_links
=[
input_layer_name
],
out_links
=[
name
],
seq_reversed
=
seq_reversed
)
memory_name
=
Memory
(
name
=
name
,
size
=
size
)
Layer
(
name
=
name
,
type
=
"mixed"
,
size
=
size
,
active_type
=
active_type
,
bias
=
bias
,
inputs
= [
IdentityProjection
(
input_layer_name
),
FullMatrixProjection
(
memory_name
,
parameter_name
=
parameter_name
,
),
]
)
RecurrentLayerGroupEnd
(
name
+
"_layer_group"
)
def
ltr_network
(
network_name
,
word_dim
=
word_dim
,
wordvec_dim
=
wordvec_dim
,
layer2_dim
=
layer2_dim
,
layer3_dim
=
layer3_dim
,
hidden_dim
=
hidden_dim
,
slot_names
=
slot_names
,
l1
=
l1
,
l2
=
l2
):
slotnum
=
len
(
slot_names
)
for
i
in
xrange
(
slotnum
):
Inputs
(
slot_names
[
i
] +
network_name
)
for
i
in
xrange
(
slotnum
):
Layer
(
name
=
slot_names
[
i
] +
network_name
,
type
=
"data"
,
size
=
word_dim
,
device
= -
1
,
)
Layer
(
name
=
slot_names
[
i
] +
"_embedding_"
+
network_name
,
type
=
"mixed"
,
size
=
wordvec_dim
,
bias
=
False
,
device
= -
1
,
inputs
=
TableProjection
(
slot_names
[
i
] +
network_name
,
parameter_name
=
"embedding.w0"
,
decay_rate_l1
=
l1
,
sparse_remote_update
=
True
,
sparse_update
=
sparse_update
,
),
)
SimpleRecurrentLayer
(
name
=
slot_names
[
i
] +
"_rnn1_"
+
network_name
,
size
=
hidden_dim
,
active_type
=
"tanh"
,
bias
=
Bias
(
initial_std
=
0
,
parameter_name
=
"rnn1.bias"
),
input_layer_name
=
slot_names
[
i
] +
"_embedding_"
+
network_name
,
parameter_name
=
"rnn1.w0"
,
)
Layer
(
name
=
slot_names
[
i
] +
"_rnnlast_"
+
network_name
,
type
=
"seqlastins"
,
inputs
= [
slot_names
[
i
] +
"_rnn1_"
+
network_name
,
],
)
Layer
(
name
=
"layer2_"
+
network_name
,
type
=
"fc"
,
active_type
=
"tanh"
,
size
=
layer2_dim
,
bias
=
Bias
(
parameter_name
=
"layer2.bias"
),
inputs
= [
Input
(
slot_name
+
"_rnnlast_"
+
network_name
,
parameter_name
=
"_layer2_"
+
slot_name
+
".w"
,
decay_rate
=
l2
,
initial_smart
=
True
)
for
slot_name
in
slot_names
]
)
Layer
(
name
=
"layer3_"
+
network_name
,
type
=
"fc"
,
active_type
=
"tanh"
,
size
=
layer3_dim
,
bias
=
Bias
(
parameter_name
=
"layer3.bias"
),
inputs
= [
Input
(
"layer2_"
+
network_name
,
parameter_name
=
"_layer3.w"
,
decay_rate
=
l2
,
initial_smart
=
True
),
]
)
Layer
(
name
=
"output_"
+
network_name
,
type
=
"fc"
,
size
=
1
,
bias
=
False
,
inputs
= [
Input
(
"layer3_"
+
network_name
,
parameter_name
=
"_layerO.w"
),
],
)
ltr_network
(
"left"
)
ltr_network
(
"right"
)
Inputs
(
"label"
)
Layer
(
name
=
"label"
,
type
=
"data"
,
size
=
1
,
)
Outputs
(
"cost"
,
"qb_rnnlast_left"
)
Layer
(
name
=
"cost"
,
type
=
"rank-cost"
,
inputs
= [
"output_left"
,
"output_right"
,
"label"
],
)
paddle/trainer/tests/testPyDataWrapper.py
浏览文件 @
5ee63bb6
...
...
@@ -20,28 +20,6 @@ import random
import
json
import
string
@
provider
(
slots
=
[
SparseNonValueSlot
(
10
),
DenseSlot
(
2
),
SparseValueSlot
(
10
),
StringSlot
(
1
),
IndexSlot
(
3
)
])
def
processNonSequenceData
(
obj
,
filename
):
with
open
(
filename
,
"rb"
)
as
f
:
for
line
in
f
:
slots_str
=
line
.
split
(
';'
)
index
=
int
(
slots_str
[
0
])
non_values
=
map
(
int
,
slots_str
[
1
].
split
()[
1
:])
dense
=
map
(
float
,
slots_str
[
2
].
split
()[
1
:])
strs
=
slots_str
[
4
].
strip
().
split
(
' '
,
1
)[
1
]
def
__values_mapper__
(
s
):
s
=
s
.
split
(
":"
)
return
int
(
s
[
0
]),
float
(
s
[
1
])
values
=
map
(
__values_mapper__
,
slots_str
[
3
].
split
()[
1
:])
yield
[
non_values
,
dense
,
values
,
strs
,
index
]
SPARSE_ID_LIMIT
=
1000
SPARSE_ID_COUNT
=
100
SEQUENCE_LIMIT
=
50
...
...
@@ -146,8 +124,6 @@ def processSubSeqAndGenerateData(obj, name):
if
__name__
==
"__main__"
:
pvd
=
processNonSequenceData
(
"test.txt"
)
print
pvd
.
getNextBatch
(
100
)
pvd
=
processSeqAndGenerateData
(
"_"
)
print
pvd
.
getNextBatch
(
100
)
pvd
=
processSubSeqAndGenerateData
(
"_"
)
...
...
paddle/trainer/tests/test_CompareTwoOpts.cpp
已删除
100644 → 0
浏览文件 @
571ef90c
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include <paddle/utils/PythonUtil.h>
#include <algorithm>
#include <cstdlib>
#include "paddle/trainer/Trainer.h"
using
namespace
paddle
;
// NOLINT
using
namespace
std
;
// NOLINT
DECLARE_int32
(
gpu_id
);
DECLARE_bool
(
local
);
DECLARE_bool
(
use_gpu
);
DECLARE_string
(
config
);
DECLARE_string
(
nics
);
DEFINE_string
(
config_file_a
,
""
,
"config of one network to compare"
);
DEFINE_string
(
config_file_b
,
""
,
"config of another network to compare"
);
DEFINE_bool
(
need_high_accuracy
,
true
,
"whether need to run in double accuracy (recommended)"
);
DEFINE_double
(
max_diff_ratio
,
0.0
f
,
"max diff ratio allowed for outputs and parameters (value/gradient)"
);
struct
ComData
{
vector
<
Argument
>
outArgs
;
vector
<
ParameterPtr
>
parameters
;
};
void
calcGradient
(
ComData
&
data
,
const
string
configFile
)
{
FLAGS_config
=
configFile
;
FLAGS_local
=
true
;
FLAGS_use_gpu
=
false
;
FLAGS_nics
=
""
;
*
ThreadLocalRand
::
getSeed
()
=
0
;
srand
(
0
);
Trainer
trainer
;
trainer
.
init
(
TrainerConfigHelper
::
createFromFlagConfig
(),
false
);
data
.
parameters
=
trainer
.
getGradientMachine
()
->
getParameters
();
trainer
.
getDataProvider
()
->
setSkipShuffle
();
trainer
.
train
();
}
void
checkBuffer
(
real
*
A
,
const
char
*
desA
,
real
*
B
,
const
char
*
desB
,
size_t
len
,
size_t
width
=
1
)
{
int
nNum
=
0
;
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
{
real
diff
=
fabs
(
A
[
i
]
-
B
[
i
]);
if
(
diff
>
0.0
f
&&
diff
/
std
::
max
(
fabs
(
A
[
i
]),
fabs
(
B
[
i
]))
>
FLAGS_max_diff_ratio
)
{
nNum
++
;
LOG
(
INFO
)
<<
"Row: "
<<
i
/
width
<<
", "
<<
desA
<<
" : "
<<
A
[
i
]
<<
" "
<<
desB
<<
" : "
<<
B
[
i
];
}
}
EXPECT_EQ
(
0
,
nNum
);
LOG
(
INFO
)
<<
"
\n\n
"
;
}
void
compareGradient
(
ComData
&
comDataA
,
ComData
&
comDataB
)
{
vector
<
Argument
>
outArgsA
=
comDataA
.
outArgs
;
vector
<
Argument
>
outArgsB
=
comDataB
.
outArgs
;
for
(
size_t
i
=
0
;
i
<
outArgsA
.
size
();
++
i
)
{
CpuMatrix
matA
(
outArgsA
[
i
].
value
->
getHeight
(),
outArgsA
[
i
].
value
->
getWidth
());
CpuMatrix
matB
(
outArgsB
[
i
].
value
->
getHeight
(),
outArgsB
[
i
].
value
->
getWidth
());
matA
.
copyFrom
(
*
outArgsA
[
i
].
value
);
matB
.
copyFrom
(
*
outArgsB
[
i
].
value
);
LOG
(
INFO
)
<<
"
\n
--------------------------------"
<<
" Check Network Output_"
<<
i
<<
":"
<<
" -------------------------------------
\n
"
;
checkBuffer
(
matA
.
getData
(),
"network A output"
,
matB
.
getData
(),
"network B output"
,
matA
.
getElementCnt
(),
matA
.
getWidth
());
}
vector
<
ParameterPtr
>&
parametersA
=
comDataA
.
parameters
;
vector
<
ParameterPtr
>&
parametersB
=
comDataB
.
parameters
;
LOG
(
INFO
)
<<
"
\n\n
--------------------------------"
<<
" Check Gradient Machine Parameters:"
<<
" -------------------------------------
\n
"
;
for
(
size_t
i
=
0
;
i
<
parametersA
.
size
();
++
i
)
{
ParameterPtr
parameterA
,
parameterB
;
parameterA
=
parametersA
[
i
];
parameterB
=
parametersB
[
i
];
CpuVector
paraA
(
parameterA
->
getSize
());
CpuVector
paraB
(
parameterB
->
getSize
());
paraA
.
copyFrom
(
*
parameterA
->
getBuf
(
PARAMETER_VALUE
));
paraB
.
copyFrom
(
*
parameterB
->
getBuf
(
PARAMETER_VALUE
));
LOG
(
INFO
)
<<
"
\n\n
----------- PARAMETER_VALUE: "
<<
parameterA
->
getName
()
<<
" ; size : "
<<
paraA
.
getSize
()
<<
" ------------"
;
checkBuffer
(
paraA
.
getData
(),
"Network A"
,
paraB
.
getData
(),
"Network B"
,
paraA
.
getSize
());
CpuVector
gradA
(
*
parameterA
->
getBuf
(
PARAMETER_GRADIENT
));
CpuVector
gradB
(
*
parameterB
->
getBuf
(
PARAMETER_GRADIENT
));
LOG
(
INFO
)
<<
"
\n\n
----------- PARAMETER_GRADIENT: "
<<
parameterA
->
getName
()
<<
" ; size : "
<<
gradA
.
getSize
()
<<
" -----------"
;
checkBuffer
(
gradA
.
getData
(),
"Network A"
,
gradB
.
getData
(),
"Network B"
,
gradA
.
getSize
());
}
}
TEST
(
Trainer
,
create
)
{
ComData
dataA
;
calcGradient
(
dataA
,
FLAGS_config_file_a
);
LOG
(
INFO
)
<<
"
\n\n
training of Network A is finished
\n\n
"
;
ComData
dataB
;
calcGradient
(
dataB
,
FLAGS_config_file_b
);
LOG
(
INFO
)
<<
"
\n\n
training of the Network B is finished
\n\n
"
;
compareGradient
(
dataA
,
dataB
);
}
int
main
(
int
argc
,
char
**
argv
)
{
paddle
::
initMain
(
argc
,
argv
);
testing
::
InitGoogleTest
(
&
argc
,
argv
);
initPython
(
argc
,
argv
);
#ifndef PADDLE_TYPE_DOUBLE
if
(
FLAGS_need_high_accuracy
)
{
LOG
(
INFO
)
<<
"skip test due to it's need high accuracy"
;
return
0
;
}
if
(
FLAGS_max_diff_ratio
==
0.0
f
)
{
FLAGS_max_diff_ratio
=
2e-4
;
LOG
(
INFO
)
<<
"auto set max_diff_ratio "
<<
FLAGS_max_diff_ratio
<<
" in low accuracy mode"
;
}
#else
if
(
FLAGS_max_diff_ratio
==
0.0
f
)
{
FLAGS_max_diff_ratio
=
2e-7
;
LOG
(
INFO
)
<<
"auto set max_diff_ratio "
<<
FLAGS_max_diff_ratio
<<
" in high accuracy mode"
;
}
#endif
int
ret
=
RUN_ALL_TESTS
();
return
ret
;
}
paddle/trainer/tests/test_PyDataProviderWrapper.cpp
浏览文件 @
5ee63bb6
...
...
@@ -25,45 +25,9 @@ limitations under the License. */
#include <unordered_set>
#include "picojson.h"
void
checkEqual
(
const
paddle
::
Argument
&
expect
,
const
paddle
::
Argument
&
actual
);
void
checkValue
(
std
::
vector
<
paddle
::
Argument
>&
arguments
,
picojson
::
array
&
arr
);
const
std
::
string
kDir
=
"./trainer/tests/pydata_provider_wrapper_dir/"
;
TEST
(
PyDataProviderWrapper
,
NoSequenceData
)
{
paddle
::
DataConfig
conf
;
conf
.
set_type
(
"py"
);
conf
.
set_load_data_module
(
std
::
string
(
"testPyDataWrapper"
));
conf
.
set_load_data_object
(
std
::
string
(
"processNonSequenceData"
));
conf
.
set_async_load_data
(
false
);
conf
.
clear_files
();
conf
.
set_files
(
kDir
+
"test_pydata_provider_wrapper.list"
);
paddle
::
DataProviderPtr
provider
(
paddle
::
DataProvider
::
create
(
conf
,
false
));
provider
->
setSkipShuffle
();
provider
->
reset
();
paddle
::
DataBatch
batchFromPy
;
provider
->
getNextBatch
(
100
,
&
batchFromPy
);
paddle
::
DataConfig
conf2
;
conf2
.
set_type
(
"proto"
);
conf2
.
set_async_load_data
(
false
);
conf2
.
clear_files
();
conf2
.
set_files
(
kDir
+
"test_pydata_provider_wrapper.protolist"
);
provider
.
reset
(
paddle
::
DataProvider
::
create
(
conf2
,
false
));
provider
->
setSkipShuffle
();
provider
->
reset
();
paddle
::
DataBatch
batchFromProto
;
provider
->
getNextBatch
(
100
,
&
batchFromProto
);
std
::
vector
<
paddle
::
Argument
>&
pyArguments
=
batchFromPy
.
getStreams
();
std
::
vector
<
paddle
::
Argument
>&
protoArguments
=
batchFromProto
.
getStreams
();
EXPECT_EQ
(
pyArguments
.
size
(),
protoArguments
.
size
());
for
(
size_t
i
=
0
;
i
<
pyArguments
.
size
();
++
i
)
{
checkEqual
(
protoArguments
[
i
],
pyArguments
[
i
]);
}
}
TEST
(
PyDataProviderWrapper
,
SequenceData
)
{
paddle
::
DataConfig
conf
;
conf
.
set_type
(
"py"
);
...
...
@@ -148,66 +112,6 @@ int main(int argc, char** argv) {
return
RUN_ALL_TESTS
();
}
void
checkEqual
(
const
paddle
::
Argument
&
expect
,
const
paddle
::
Argument
&
actual
)
{
if
(
expect
.
value
)
{
EXPECT_TRUE
(
actual
.
value
!=
nullptr
);
paddle
::
Matrix
*
e
=
expect
.
value
.
get
();
paddle
::
Matrix
*
a
=
actual
.
value
.
get
();
EXPECT_EQ
(
e
->
getWidth
(),
a
->
getWidth
());
EXPECT_EQ
(
e
->
getHeight
(),
a
->
getHeight
());
if
(
dynamic_cast
<
paddle
::
CpuSparseMatrix
*>
(
e
))
{
paddle
::
CpuSparseMatrix
*
se
=
dynamic_cast
<
paddle
::
CpuSparseMatrix
*>
(
e
);
paddle
::
CpuSparseMatrix
*
sa
=
dynamic_cast
<
paddle
::
CpuSparseMatrix
*>
(
a
);
EXPECT_EQ
(
se
->
getFormat
(),
sa
->
getFormat
());
EXPECT_EQ
(
se
->
getElementCnt
(),
sa
->
getElementCnt
());
size_t
rowSize
=
se
->
getFormat
()
==
paddle
::
SPARSE_CSC
?
se
->
getElementCnt
()
:
se
->
getHeight
()
+
1
;
size_t
colSize
=
se
->
getFormat
()
==
paddle
::
SPARSE_CSC
?
se
->
getWidth
()
+
1
:
se
->
getElementCnt
();
for
(
size_t
i
=
0
;
i
<
rowSize
;
++
i
)
{
EXPECT_EQ
(
se
->
getRows
()[
i
],
sa
->
getRows
()[
i
]);
}
for
(
size_t
i
=
0
;
i
<
colSize
;
++
i
)
{
EXPECT_EQ
(
se
->
getCols
()[
i
],
sa
->
getCols
()[
i
]);
}
if
(
se
->
getValueType
()
==
paddle
::
FLOAT_VALUE
)
{
EXPECT_EQ
(
paddle
::
FLOAT_VALUE
,
sa
->
getValueType
());
for
(
size_t
i
=
0
;
i
<
se
->
getElementCnt
();
++
i
)
{
EXPECT_EQ
(
se
->
getValue
()[
i
],
sa
->
getValue
()[
i
]);
}
}
}
else
if
(
dynamic_cast
<
paddle
::
CpuMatrix
*>
(
e
))
{
EXPECT_EQ
(
e
->
getElementCnt
(),
a
->
getElementCnt
());
for
(
size_t
i
=
0
;
i
<
e
->
getElementCnt
();
++
i
)
{
EXPECT_EQ
(
e
->
getData
()[
i
],
a
->
getData
()[
i
]);
}
}
}
if
(
expect
.
ids
)
{
EXPECT_TRUE
(
actual
.
ids
!=
nullptr
);
paddle
::
VectorT
<
int
>*
e
=
expect
.
ids
.
get
();
paddle
::
VectorT
<
int
>*
a
=
actual
.
ids
.
get
();
EXPECT_EQ
(
e
->
getSize
(),
a
->
getSize
());
for
(
size_t
i
=
0
;
i
<
e
->
getSize
();
++
i
)
{
EXPECT_EQ
(
e
->
getData
()[
i
],
a
->
getData
()[
i
]);
}
}
if
(
expect
.
strs
)
{
EXPECT_TRUE
(
actual
.
strs
!=
nullptr
);
std
::
vector
<
std
::
string
>*
e
=
expect
.
strs
.
get
();
std
::
vector
<
std
::
string
>*
a
=
actual
.
strs
.
get
();
EXPECT_EQ
(
e
->
size
(),
a
->
size
());
for
(
size_t
i
=
0
;
i
<
e
->
size
();
++
i
)
{
EXPECT_EQ
((
*
e
)[
i
],
(
*
a
)[
i
]);
}
}
}
void
checkValue
(
std
::
vector
<
paddle
::
Argument
>&
arguments
,
picojson
::
array
&
arr
)
{
// CHECK SLOT 0, Sparse Value.
...
...
python/paddle/trainer/config_parser.py
浏览文件 @
5ee63bb6
...
...
@@ -1826,7 +1826,7 @@ class FCLayer(LayerBase):
self
.
layer_type
=
'mkldnn_fc'
config_assert
(
len
(
inputs
)
==
1
,
"M
kldnn
FCLayer support one and only one input!"
)
"M
KLDNN
FCLayer support one and only one input!"
)
super
(
FCLayer
,
self
).
__init__
(
name
,
self
.
layer_type
,
size
,
inputs
=
inputs
,
**
xargs
)
for
input_index
in
xrange
(
len
(
self
.
inputs
)):
...
...
@@ -1837,7 +1837,7 @@ class FCLayer(LayerBase):
sparse
=
format
==
"csr"
or
format
==
"csc"
if
use_mkldnn
:
config_assert
(
not
sparse
,
"M
kldnn
FCLayer do not support sparse format yet"
)
"M
KLDNN
FCLayer do not support sparse format yet"
)
if
use_mkldnn_wgt
:
dims
=
[
self
.
config
.
size
,
input_layer
.
size
]
if
sparse
:
...
...
@@ -1853,7 +1853,7 @@ class FCLayer(LayerBase):
@
config_layer
(
'mkldnn_fc'
)
class
M
kldnn
FcLayer
(
FCLayer
):
class
M
KLDNN
FcLayer
(
FCLayer
):
layer_type
=
'mkldnn_fc'
...
...
@@ -3209,6 +3209,18 @@ class SubNestedSequenceLayer(LayerBase):
self
.
set_layer_size
(
size
)
@
config_layer
(
'dot_prod'
)
class
DotProdLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
device
=
None
):
super
(
DotProdLayer
,
self
).
__init__
(
name
,
'dot_prod'
,
0
,
inputs
,
device
=
device
)
config_assert
(
len
(
inputs
)
==
2
,
'DotProdLayer must have 2 inputs.'
)
config_assert
(
self
.
get_input_layer
(
0
).
size
==
self
.
get_input_layer
(
1
).
size
,
"Two inputs should have the same size."
)
self
.
set_layer_size
(
1
)
@
config_layer
(
'out_prod'
)
class
OuterProdLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
device
=
None
):
...
...
@@ -3506,11 +3518,17 @@ def ExpressionLayer(name, inputs, **xargs):
@
config_layer
(
'concat'
)
class
ConcatenateLayer
(
LayerBase
):
layer_type
=
'concat'
def
__init__
(
self
,
name
,
inputs
,
bias
=
False
,
**
xargs
):
config_assert
(
inputs
,
'inputs cannot be empty'
)
config_assert
(
not
bias
,
'ConcatenateLayer cannot support bias.'
)
use_mkldnn
=
bool
(
int
(
g_command_config_args
.
get
(
"use_mkldnn"
,
0
)))
if
self
.
layer_type
==
"mkldnn_concat"
:
config_assert
(
use_mkldnn
,
"mkldnn_concat only support MKLDNN"
)
self
.
layer_type
=
'mkldnn_concat'
if
use_mkldnn
else
'concat'
super
(
ConcatenateLayer
,
self
).
__init__
(
name
,
'concat'
,
0
,
inputs
=
inputs
,
**
xargs
)
name
,
self
.
layer_type
,
0
,
inputs
=
inputs
,
**
xargs
)
size
=
0
for
input_index
in
xrange
(
len
(
self
.
inputs
)):
assert
self
.
get_input_layer
(
0
).
height
==
self
.
get_input_layer
(
...
...
@@ -3530,6 +3548,11 @@ class ConcatenateLayer(LayerBase):
self
.
set_layer_size
(
size
)
@
config_layer
(
'mkldnn_concat'
)
class
MKLDNNConcatLayer
(
ConcatenateLayer
):
layer_type
=
'mkldnn_concat'
# like concat layer, but each input layer was processed by a Projection.
@
config_layer
(
'concat2'
)
class
ConcatenateLayer2
(
LayerBase
):
...
...
python/paddle/trainer_config_helpers/layers.py
浏览文件 @
5ee63bb6
...
...
@@ -115,6 +115,7 @@ __all__ = [
'huber_classification_cost'
,
'block_expand_layer'
,
'maxout_layer'
,
'dot_prod_layer'
,
'out_prod_layer'
,
'printer_layer'
,
'print_layer'
,
...
...
@@ -198,6 +199,7 @@ class LayerType(object):
SCALING_LAYER
=
'scaling'
TRANS_LAYER
=
'trans'
ROTATE_LAYER
=
'rotate'
DOT_PROD_LAYER
=
'dot_prod'
OUT_PROD_LAYER
=
'out_prod'
FEATURE_MAP_EXPAND_LAYER
=
'featmap_expand'
...
...
@@ -4143,6 +4145,45 @@ def maxid_layer(input, name=None, layer_attr=None):
size
=
l
.
config
.
size
)
@
wrap_name_default
()
def
dot_prod_layer
(
input1
,
input2
,
name
=
None
,
layer_attr
=
None
):
"""
A layer for computing the dot product of two vectors.
The example usage is:
.. code-block:: python
dot_prod = dot_prod_layer(input1=vec1, input2=vec2)
:param name: The name of this layer. It is optional.
:type name: basestring
:param input1: The first input layer.
:type input: LayerOutput
:param input2: The second input layer.
:type input2: LayerOutput
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute.
:return: LayerOutput object.
:rtype: LayerOutput
"""
assert
isinstance
(
input1
,
LayerOutput
)
assert
isinstance
(
input2
,
LayerOutput
)
assert
input1
.
size
==
input2
.
size
,
(
"Two inputs should have the same size."
)
l
=
Layer
(
name
=
name
,
type
=
LayerType
.
DOT_PROD_LAYER
,
inputs
=
[
input1
.
name
,
input2
.
name
],
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
=
name
,
layer_type
=
LayerType
.
DOT_PROD_LAYER
,
parents
=
[
input1
,
input2
],
size
=
l
.
config
.
size
)
@
wrap_name_default
()
def
out_prod_layer
(
input1
,
input2
,
name
=
None
,
layer_attr
=
None
):
"""
...
...
python/paddle/trainer_config_helpers/tests/configs/file_list.sh
浏览文件 @
5ee63bb6
...
...
@@ -11,6 +11,6 @@ test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_l
test_kmax_seq_socre_layer test_sub_nested_seq_select_layer test_scale_shift_layer
test_seq_slice_layer test_cross_entropy_over_beam test_roi_pool_layer test_pooling3D_layer
test_conv3d_layer test_deconv3d_layer test_BatchNorm3D test_resize_layer test_scale_sub_region_layer
test_factorization_machine
)
test_
dot_prod_layer test_
factorization_machine
)
export
whole_configs
=(
test_split_datasource
)
python/paddle/trainer_config_helpers/tests/configs/protostr/test_dot_prod_layer.protostr
0 → 100644
浏览文件 @
5ee63bb6
type: "nn"
layers {
name: "vector1"
type: "data"
size: 10
active_type: ""
}
layers {
name: "vector2"
type: "data"
size: 10
active_type: ""
}
layers {
name: "__dot_prod_layer_0__"
type: "dot_prod"
size: 1
active_type: ""
inputs {
input_layer_name: "vector1"
}
inputs {
input_layer_name: "vector2"
}
}
input_layer_names: "vector1"
input_layer_names: "vector2"
output_layer_names: "__dot_prod_layer_0__"
sub_models {
name: "root"
layer_names: "vector1"
layer_names: "vector2"
layer_names: "__dot_prod_layer_0__"
input_layer_names: "vector1"
input_layer_names: "vector2"
output_layer_names: "__dot_prod_layer_0__"
is_recurrent_layer_group: false
}
python/paddle/trainer_config_helpers/tests/configs/test_dot_prod_layer.py
0 → 100644
浏览文件 @
5ee63bb6
from
paddle.trainer_config_helpers
import
*
vec1
=
data_layer
(
name
=
'vector1'
,
size
=
10
)
vec2
=
data_layer
(
name
=
'vector2'
,
size
=
10
)
dot_product
=
dot_prod_layer
(
input1
=
vec1
,
input2
=
vec2
)
outputs
(
dot_product
)
python/paddle/v2/fluid/framework.py
浏览文件 @
5ee63bb6
...
...
@@ -4,7 +4,10 @@ import collections
import
numpy
as
np
import
copy
__all__
=
[
'Block'
,
'Variable'
,
'Program'
,
'Operator'
,
'default_startup_program'
,
'default_main_program'
]
__all__
=
[
'Block'
,
'Variable'
,
'Program'
,
'Operator'
,
'default_startup_program'
,
'default_main_program'
]
def
unique_name
(
prefix
):
...
...
@@ -232,17 +235,17 @@ class Operator(object):
in_proto
.
name
)
if
found
:
in_arg
u
s
=
inputs
[
in_proto
.
name
]
if
not
isinstance
(
in_arg
u
s
,
list
):
in_arg
us
=
[
in_argu
s
]
if
not
in_proto
.
duplicable
and
len
(
in_arg
u
s
)
>
1
:
in_args
=
inputs
[
in_proto
.
name
]
if
not
isinstance
(
in_args
,
list
):
in_arg
s
=
[
in_arg
s
]
if
not
in_proto
.
duplicable
and
len
(
in_args
)
>
1
:
raise
ValueError
(
"Input %s expects only one input, but %d are given."
%
(
in_proto
.
name
,
len
(
in_arg
u
s
)))
in_arg
u
_names
=
[]
for
arg
u
in
in_argu
s
:
in_arg
u_names
.
append
(
argu
.
name
)
self
.
desc
.
set_input
(
in_proto
.
name
,
in_arg
u
_names
)
%
(
in_proto
.
name
,
len
(
in_args
)))
in_arg_names
=
[]
for
arg
in
in_arg
s
:
in_arg
_names
.
append
(
arg
.
name
)
self
.
desc
.
set_input
(
in_proto
.
name
,
in_arg_names
)
else
:
self
.
desc
.
set_input
(
in_proto
.
name
,
[])
...
...
@@ -260,18 +263,18 @@ class Operator(object):
str
(
e
)
for
e
in
given
)))
for
out_proto
in
proto
.
outputs
:
out_arg
u
s
=
outputs
[
out_proto
.
name
]
if
not
isinstance
(
out_arg
u
s
,
list
):
out_arg
us
=
[
out_argu
s
]
if
not
out_proto
.
duplicable
and
len
(
out_arg
u
s
)
>
1
:
out_args
=
outputs
[
out_proto
.
name
]
if
not
isinstance
(
out_args
,
list
):
out_arg
s
=
[
out_arg
s
]
if
not
out_proto
.
duplicable
and
len
(
out_args
)
>
1
:
raise
ValueError
(
"Output %s expects only one output, but %d are given."
%
(
out_proto
.
name
,
len
(
out_arg
u
s
)))
out_arg
u
_names
=
[]
for
arg
u
in
out_argu
s
:
out_arg
u_names
.
append
(
argu
.
name
)
arg
u
.
op
=
self
self
.
desc
.
set_output
(
out_proto
.
name
,
out_arg
u
_names
)
(
out_proto
.
name
,
len
(
out_args
)))
out_arg_names
=
[]
for
arg
in
out_arg
s
:
out_arg
_names
.
append
(
arg
.
name
)
arg
.
op
=
self
self
.
desc
.
set_output
(
out_proto
.
name
,
out_arg_names
)
if
attrs
is
not
None
:
if
not
isinstance
(
attrs
,
dict
):
...
...
@@ -582,8 +585,10 @@ class Parameter(Variable):
g_main_program
=
Program
()
g_startup_program
=
Program
()
def
default_startup_program
():
return
g_startup_program
def
default_main_program
():
return
g_main_program
python/paddle/v2/fluid/tests/book/test_fit_a_line.py
浏览文件 @
5ee63bb6
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.optimizer
as
optimizer
import
paddle.v2.fluid.framework
as
framework
from
paddle.v2.fluid.io
import
save_persistables
,
load_persistable
s
import
paddle.v2.fluid.layers
as
layer
s
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.io
import
save_persistables
,
load_persistables
from
paddle.v2.fluid.optimizer
import
SGDOptimizer
import
numpy
as
np
x
=
layers
.
data
(
name
=
'x'
,
shape
=
[
13
],
data_type
=
'float32'
)
x
=
layers
.
data
(
name
=
'x'
,
shape
=
[
13
],
data_type
=
'float32'
)
y_predict
=
layers
.
fc
(
input
=
x
,
size
=
1
,
act
=
None
)
y_predict
=
layers
.
fc
(
input
=
x
,
size
=
1
,
act
=
None
)
y
=
layers
.
data
(
name
=
'y'
,
shape
=
[
1
],
data_type
=
'float32'
)
y
=
layers
.
data
(
name
=
'y'
,
shape
=
[
1
],
data_type
=
'float32'
)
cost
=
layers
.
square_error_cost
(
input
=
y_predict
,
label
=
y
)
cost
=
layers
.
square_error_cost
(
input
=
y_predict
,
label
=
y
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
sgd_optimizer
=
optimizer
.
SGDOptimizer
(
learning_rate
=
0.001
)
sgd_optimizer
=
SGDOptimizer
(
learning_rate
=
0.001
)
opts
=
sgd_optimizer
.
minimize
(
avg_cost
)
BATCH_SIZE
=
20
...
...
python/paddle/v2/fluid/tests/book/test_image_classification_train.py
浏览文件 @
5ee63bb6
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.nets
as
nets
import
paddle.v2.fluid.optimizer
as
optimizer
from
paddle.v2.fluid.executor
import
Executor
import
paddle.v2.fluid.framework
as
framework
from
paddle.v2.fluid.initializer
import
XavierInitializer
from
paddle.v2.fluid.optimizer
import
AdamOptimizer
def
resnet_cifar10
(
input
,
depth
=
32
):
def
conv_bn_layer
(
input
,
ch_out
,
filter_size
,
stride
,
padding
,
act
=
'relu'
):
def
conv_bn_layer
(
input
,
ch_out
,
filter_size
,
stride
,
padding
,
act
=
'relu'
):
tmp
=
layers
.
conv2d
(
input
=
input
,
filter_size
=
filter_size
,
...
...
@@ -24,9 +19,7 @@ def resnet_cifar10(input, depth=32):
padding
=
padding
,
act
=
None
,
bias_attr
=
False
)
return
layers
.
batch_norm
(
input
=
tmp
,
act
=
act
)
return
layers
.
batch_norm
(
input
=
tmp
,
act
=
act
)
def
shortcut
(
input
,
ch_in
,
ch_out
,
stride
,
program
,
init_program
):
if
ch_in
!=
ch_out
:
...
...
@@ -35,28 +28,11 @@ def resnet_cifar10(input, depth=32):
else
:
return
input
def
basicblock
(
input
,
ch_in
,
ch_out
,
stride
):
tmp
=
conv_bn_layer
(
input
,
ch_out
,
3
,
stride
,
1
)
tmp
=
conv_bn_layer
(
tmp
,
ch_out
,
3
,
1
,
1
,
act
=
None
)
def
basicblock
(
input
,
ch_in
,
ch_out
,
stride
):
tmp
=
conv_bn_layer
(
input
,
ch_out
,
3
,
stride
,
1
)
tmp
=
conv_bn_layer
(
tmp
,
ch_out
,
3
,
1
,
1
,
act
=
None
)
short
=
shortcut
(
input
,
ch_in
,
ch_out
,
stride
)
return
layers
.
elementwise_add
(
x
=
tmp
,
y
=
short
,
act
=
'relu'
)
return
layers
.
elementwise_add
(
x
=
tmp
,
y
=
short
,
act
=
'relu'
)
def
layer_warp
(
block_func
,
input
,
ch_in
,
ch_out
,
count
,
stride
):
tmp
=
block_func
(
input
,
ch_in
,
ch_out
,
stride
)
...
...
@@ -67,45 +43,17 @@ def resnet_cifar10(input, depth=32):
assert
(
depth
-
2
)
%
6
==
0
n
=
(
depth
-
2
)
/
6
conv1
=
conv_bn_layer
(
input
=
input
,
ch_out
=
16
,
filter_size
=
3
,
stride
=
1
,
padding
=
1
)
res1
=
layer_warp
(
basicblock
,
conv1
,
16
,
16
,
n
,
1
)
res2
=
layer_warp
(
basicblock
,
res1
,
16
,
32
,
n
,
2
)
res3
=
layer_warp
(
basicblock
,
res2
,
32
,
64
,
n
,
2
)
input
=
input
,
ch_out
=
16
,
filter_size
=
3
,
stride
=
1
,
padding
=
1
)
res1
=
layer_warp
(
basicblock
,
conv1
,
16
,
16
,
n
,
1
)
res2
=
layer_warp
(
basicblock
,
res1
,
16
,
32
,
n
,
2
)
res3
=
layer_warp
(
basicblock
,
res2
,
32
,
64
,
n
,
2
)
pool
=
layers
.
pool2d
(
input
=
res3
,
pool_size
=
8
,
pool_type
=
'avg'
,
pool_stride
=
1
)
input
=
res3
,
pool_size
=
8
,
pool_type
=
'avg'
,
pool_stride
=
1
)
return
pool
def
vgg16_bn_drop
(
input
):
def
conv_block
(
input
,
num_filter
,
groups
,
dropouts
):
def
conv_block
(
input
,
num_filter
,
groups
,
dropouts
):
return
nets
.
img_conv_group
(
input
=
input
,
pool_size
=
2
,
...
...
@@ -123,22 +71,14 @@ def vgg16_bn_drop(input):
conv4
=
conv_block
(
conv3
,
512
,
3
,
[
0.4
,
0.4
,
0
])
conv5
=
conv_block
(
conv4
,
512
,
3
,
[
0.4
,
0.4
,
0
])
drop
=
layers
.
dropout
(
x
=
conv5
,
dropout_prob
=
0.5
)
drop
=
layers
.
dropout
(
x
=
conv5
,
dropout_prob
=
0.5
)
fc1
=
layers
.
fc
(
input
=
drop
,
size
=
512
,
act
=
None
,
param_attr
=
{
"initializer"
:
XavierInitializer
()})
reshape1
=
layers
.
reshape
(
x
=
fc1
,
shape
=
list
(
fc1
.
shape
+
(
1
,
1
)))
bn
=
layers
.
batch_norm
(
input
=
reshape1
,
act
=
'relu'
)
drop2
=
layers
.
dropout
(
x
=
bn
,
dropout_prob
=
0.5
)
reshape1
=
layers
.
reshape
(
x
=
fc1
,
shape
=
list
(
fc1
.
shape
+
(
1
,
1
)))
bn
=
layers
.
batch_norm
(
input
=
reshape1
,
act
=
'relu'
)
drop2
=
layers
.
dropout
(
x
=
bn
,
dropout_prob
=
0.5
)
fc2
=
layers
.
fc
(
input
=
drop2
,
size
=
512
,
act
=
None
,
...
...
@@ -165,8 +105,8 @@ cost = layers.cross_entropy(input=predict, label=label)
avg_cost
=
layers
.
mean
(
x
=
cost
)
accuracy
=
layers
.
accuracy
(
input
=
predict
,
label
=
label
)
# optimizer =
optimizer.
SGDOptimizer(learning_rate=0.001)
optimizer
=
optimizer
.
AdamOptimizer
(
learning_rate
=
0.001
)
# optimizer = SGDOptimizer(learning_rate=0.001)
optimizer
=
AdamOptimizer
(
learning_rate
=
0.001
)
opts
=
optimizer
.
minimize
(
avg_cost
)
BATCH_SIZE
=
128
...
...
python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py
浏览文件 @
5ee63bb6
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.nets
as
nets
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.optimizer
as
optimizer
import
paddle.v2.fluid.evaluator
as
evaluator
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.nets
as
nets
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.optimizer
import
AdamOptimizer
import
numpy
as
np
images
=
layers
.
data
(
name
=
'pixel'
,
shape
=
[
1
,
28
,
28
],
data_type
=
'float32'
)
label
=
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
data_type
=
'int64'
)
images
=
layers
.
data
(
name
=
'pixel'
,
shape
=
[
1
,
28
,
28
],
data_type
=
'float32'
)
label
=
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
data_type
=
'int64'
)
conv_pool_1
=
nets
.
simple_img_conv_pool
(
input
=
images
,
filter_size
=
5
,
...
...
@@ -32,17 +25,13 @@ conv_pool_2 = nets.simple_img_conv_pool(
pool_stride
=
2
,
act
=
"relu"
)
predict
=
layers
.
fc
(
input
=
conv_pool_2
,
size
=
10
,
act
=
"softmax"
)
predict
=
layers
.
fc
(
input
=
conv_pool_2
,
size
=
10
,
act
=
"softmax"
)
cost
=
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
optimizer
=
optimizer
.
AdamOptimizer
(
learning_rate
=
0.01
,
beta1
=
0.9
,
beta2
=
0.999
)
optimizer
=
AdamOptimizer
(
learning_rate
=
0.01
,
beta1
=
0.9
,
beta2
=
0.999
)
opts
=
optimizer
.
minimize
(
avg_cost
)
accuracy
,
acc_out
=
evaluator
.
accuracy
(
input
=
predict
,
label
=
label
)
accuracy
,
acc_out
=
evaluator
.
accuracy
(
input
=
predict
,
label
=
label
)
BATCH_SIZE
=
50
PASS_NUM
=
3
...
...
python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
浏览文件 @
5ee63bb6
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.optimizer
as
optimizer
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
from
paddle.v2.fluid.executor
import
Executor
from
paddle.v2.fluid.regularizer
import
L2DecayRegularizer
from
paddle.v2.fluid.initializer
import
UniformInitializer
import
numpy
as
np
from
paddle.v2.fluid.optimizer
import
MomentumOptimizer
from
paddle.v2.fluid.regularizer
import
L2DecayRegularizer
BATCH_SIZE
=
128
image
=
layers
.
data
(
name
=
'x'
,
shape
=
[
784
],
data_type
=
'float32'
)
image
=
layers
.
data
(
name
=
'x'
,
shape
=
[
784
],
data_type
=
'float32'
)
param_attr
=
{
'name'
:
None
,
...
...
@@ -22,32 +18,21 @@ param_attr = {
'regularization'
:
L2DecayRegularizer
(
0.0005
*
BATCH_SIZE
)
}
hidden1
=
layers
.
fc
(
input
=
image
,
size
=
128
,
act
=
'relu'
,
param_attr
=
param_attr
)
hidden2
=
layers
.
fc
(
input
=
hidden1
,
size
=
64
,
act
=
'relu'
,
param_attr
=
param_attr
)
hidden1
=
layers
.
fc
(
input
=
image
,
size
=
128
,
act
=
'relu'
,
param_attr
=
param_attr
)
hidden2
=
layers
.
fc
(
input
=
hidden1
,
size
=
64
,
act
=
'relu'
,
param_attr
=
param_attr
)
predict
=
layers
.
fc
(
input
=
hidden2
,
size
=
10
,
act
=
'softmax'
,
param_attr
=
param_attr
)
label
=
layers
.
data
(
name
=
'y'
,
shape
=
[
1
],
data_type
=
'int64'
)
label
=
layers
.
data
(
name
=
'y'
,
shape
=
[
1
],
data_type
=
'int64'
)
cost
=
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
accuracy
=
layers
.
accuracy
(
input
=
predict
,
label
=
label
)
accuracy
=
layers
.
accuracy
(
input
=
predict
,
label
=
label
)
optimizer
=
optimizer
.
MomentumOptimizer
(
learning_rate
=
0.001
,
momentum
=
0.9
)
optimizer
=
MomentumOptimizer
(
learning_rate
=
0.001
,
momentum
=
0.9
)
opts
=
optimizer
.
minimize
(
avg_cost
)
train_reader
=
paddle
.
batch
(
...
...
python/paddle/v2/fluid/tests/book/test_recommender_system.py
浏览文件 @
5ee63bb6
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.nets
as
nets
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.optimizer
as
optimizer
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.nets
as
nets
from
paddle.v2.fluid.executor
import
Executor
import
numpy
as
np
from
paddle.v2.fluid.optimizer
import
SGDOptimizer
IS_SPARSE
=
True
USE_GPU
=
False
...
...
@@ -19,10 +18,7 @@ def get_usr_combined_features():
USR_DICT_SIZE
=
paddle
.
dataset
.
movielens
.
max_user_id
()
+
1
uid
=
layers
.
data
(
name
=
'user_id'
,
shape
=
[
1
],
data_type
=
'int64'
)
uid
=
layers
.
data
(
name
=
'user_id'
,
shape
=
[
1
],
data_type
=
'int64'
)
usr_emb
=
layers
.
embedding
(
input
=
uid
,
...
...
@@ -31,15 +27,11 @@ def get_usr_combined_features():
param_attr
=
{
'name'
:
'user_table'
},
is_sparse
=
IS_SPARSE
)
usr_fc
=
layers
.
fc
(
input
=
usr_emb
,
size
=
32
)
usr_fc
=
layers
.
fc
(
input
=
usr_emb
,
size
=
32
)
USR_GENDER_DICT_SIZE
=
2
usr_gender_id
=
layers
.
data
(
name
=
'gender_id'
,
shape
=
[
1
],
data_type
=
'int64'
)
usr_gender_id
=
layers
.
data
(
name
=
'gender_id'
,
shape
=
[
1
],
data_type
=
'int64'
)
usr_gender_emb
=
layers
.
embedding
(
input
=
usr_gender_id
,
...
...
@@ -47,14 +39,10 @@ def get_usr_combined_features():
param_attr
=
{
'name'
:
'gender_table'
},
is_sparse
=
IS_SPARSE
)
usr_gender_fc
=
layers
.
fc
(
input
=
usr_gender_emb
,
size
=
16
)
usr_gender_fc
=
layers
.
fc
(
input
=
usr_gender_emb
,
size
=
16
)
USR_AGE_DICT_SIZE
=
len
(
paddle
.
dataset
.
movielens
.
age_table
)
usr_age_id
=
layers
.
data
(
name
=
'age_id'
,
shape
=
[
1
],
data_type
=
"int64"
)
usr_age_id
=
layers
.
data
(
name
=
'age_id'
,
shape
=
[
1
],
data_type
=
"int64"
)
usr_age_emb
=
layers
.
embedding
(
input
=
usr_age_id
,
...
...
@@ -62,14 +50,10 @@ def get_usr_combined_features():
is_sparse
=
IS_SPARSE
,
param_attr
=
{
'name'
:
'age_table'
})
usr_age_fc
=
layers
.
fc
(
input
=
usr_age_emb
,
size
=
16
)
usr_age_fc
=
layers
.
fc
(
input
=
usr_age_emb
,
size
=
16
)
USR_JOB_DICT_SIZE
=
paddle
.
dataset
.
movielens
.
max_job_id
()
+
1
usr_job_id
=
layers
.
data
(
name
=
'job_id'
,
shape
=
[
1
],
data_type
=
"int64"
)
usr_job_id
=
layers
.
data
(
name
=
'job_id'
,
shape
=
[
1
],
data_type
=
"int64"
)
usr_job_emb
=
layers
.
embedding
(
input
=
usr_job_id
,
...
...
@@ -77,16 +61,12 @@ def get_usr_combined_features():
param_attr
=
{
'name'
:
'job_table'
},
is_sparse
=
IS_SPARSE
)
usr_job_fc
=
layers
.
fc
(
input
=
usr_job_emb
,
size
=
16
)
usr_job_fc
=
layers
.
fc
(
input
=
usr_job_emb
,
size
=
16
)
concat_embed
=
layers
.
concat
(
input
=
[
usr_fc
,
usr_gender_fc
,
usr_age_fc
,
usr_job_fc
],
axis
=
1
)
input
=
[
usr_fc
,
usr_gender_fc
,
usr_age_fc
,
usr_job_fc
],
axis
=
1
)
usr_combined_features
=
layers
.
fc
(
input
=
concat_embed
,
size
=
200
,
act
=
"tanh"
)
usr_combined_features
=
layers
.
fc
(
input
=
concat_embed
,
size
=
200
,
act
=
"tanh"
)
return
usr_combined_features
...
...
@@ -95,10 +75,7 @@ def get_mov_combined_features():
MOV_DICT_SIZE
=
paddle
.
dataset
.
movielens
.
max_movie_id
()
+
1
mov_id
=
layers
.
data
(
name
=
'movie_id'
,
shape
=
[
1
],
data_type
=
'int64'
)
mov_id
=
layers
.
data
(
name
=
'movie_id'
,
shape
=
[
1
],
data_type
=
'int64'
)
mov_emb
=
layers
.
embedding
(
input
=
mov_id
,
...
...
@@ -107,36 +84,24 @@ def get_mov_combined_features():
param_attr
=
{
'name'
:
'movie_table'
},
is_sparse
=
IS_SPARSE
)
mov_fc
=
layers
.
fc
(
input
=
mov_emb
,
size
=
32
)
mov_fc
=
layers
.
fc
(
input
=
mov_emb
,
size
=
32
)
CATEGORY_DICT_SIZE
=
len
(
paddle
.
dataset
.
movielens
.
movie_categories
())
category_id
=
layers
.
data
(
name
=
'category_id'
,
shape
=
[
1
],
data_type
=
'int64'
)
category_id
=
layers
.
data
(
name
=
'category_id'
,
shape
=
[
1
],
data_type
=
'int64'
)
mov_categories_emb
=
layers
.
embedding
(
input
=
category_id
,
size
=
[
CATEGORY_DICT_SIZE
,
32
],
is_sparse
=
IS_SPARSE
)
input
=
category_id
,
size
=
[
CATEGORY_DICT_SIZE
,
32
],
is_sparse
=
IS_SPARSE
)
mov_categories_hidden
=
layers
.
sequence_pool
(
input
=
mov_categories_emb
,
pool_type
=
"sum"
)
input
=
mov_categories_emb
,
pool_type
=
"sum"
)
MOV_TITLE_DICT_SIZE
=
len
(
paddle
.
dataset
.
movielens
.
get_movie_title_dict
())
mov_title_id
=
layers
.
data
(
name
=
'movie_title'
,
shape
=
[
1
],
data_type
=
'int64'
)
mov_title_id
=
layers
.
data
(
name
=
'movie_title'
,
shape
=
[
1
],
data_type
=
'int64'
)
mov_title_emb
=
layers
.
embedding
(
input
=
mov_title_id
,
size
=
[
MOV_TITLE_DICT_SIZE
,
32
],
is_sparse
=
IS_SPARSE
)
input
=
mov_title_id
,
size
=
[
MOV_TITLE_DICT_SIZE
,
32
],
is_sparse
=
IS_SPARSE
)
mov_title_conv
=
nets
.
sequence_conv_pool
(
input
=
mov_title_emb
,
...
...
@@ -146,13 +111,10 @@ def get_mov_combined_features():
pool_type
=
"sum"
)
concat_embed
=
layers
.
concat
(
input
=
[
mov_fc
,
mov_categories_hidden
,
mov_title_conv
],
axis
=
1
)
input
=
[
mov_fc
,
mov_categories_hidden
,
mov_title_conv
],
axis
=
1
)
# FIXME(dzh) : need tanh operator
mov_combined_features
=
layers
.
fc
(
input
=
concat_embed
,
size
=
200
,
act
=
"tanh"
)
mov_combined_features
=
layers
.
fc
(
input
=
concat_embed
,
size
=
200
,
act
=
"tanh"
)
return
mov_combined_features
...
...
@@ -162,18 +124,11 @@ def model():
mov_combined_features
=
get_mov_combined_features
()
# need cos sim
inference
=
layers
.
cos_sim
(
X
=
usr_combined_features
,
Y
=
mov_combined_features
)
inference
=
layers
.
cos_sim
(
X
=
usr_combined_features
,
Y
=
mov_combined_features
)
label
=
layers
.
data
(
name
=
'score'
,
shape
=
[
1
],
data_type
=
'float32'
)
label
=
layers
.
data
(
name
=
'score'
,
shape
=
[
1
],
data_type
=
'float32'
)
square_cost
=
layers
.
square_error_cost
(
input
=
inference
,
label
=
label
)
square_cost
=
layers
.
square_error_cost
(
input
=
inference
,
label
=
label
)
avg_cost
=
layers
.
mean
(
x
=
square_cost
)
...
...
@@ -182,7 +137,7 @@ def model():
def
main
():
cost
=
model
()
sgd_optimizer
=
optimizer
.
SGDOptimizer
(
learning_rate
=
0.2
)
sgd_optimizer
=
SGDOptimizer
(
learning_rate
=
0.2
)
opts
=
sgd_optimizer
.
minimize
(
cost
)
if
USE_GPU
:
...
...
python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py
浏览文件 @
5ee63bb6
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.nets
as
nets
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.optimizer
as
optimizer
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.nets
as
nets
from
paddle.v2.fluid.executor
import
Executor
import
numpy
as
np
from
paddle.v2.fluid.optimizer
import
AdamOptimizer
def
convolution_net
(
input_dim
,
class_dim
=
2
,
emb_dim
=
32
,
hid_dim
=
32
):
...
...
@@ -31,7 +30,7 @@ def convolution_net(input_dim, class_dim=2, emb_dim=32, hid_dim=32):
act
=
"softmax"
)
cost
=
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
adam_optimizer
=
optimizer
.
AdamOptimizer
(
learning_rate
=
0.002
)
adam_optimizer
=
AdamOptimizer
(
learning_rate
=
0.002
)
opts
=
adam_optimizer
.
minimize
(
avg_cost
)
acc
=
layers
.
accuracy
(
input
=
prediction
,
label
=
label
)
return
avg_cost
,
acc
...
...
python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py
浏览文件 @
5ee63bb6
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.nets
as
nets
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.optimizer
as
optimizer
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
from
paddle.v2.fluid.executor
import
Executor
import
numpy
as
np
from
paddle.v2.fluid.optimizer
import
AdamOptimizer
def
stacked_lstm_net
(
input_dim
,
...
...
@@ -41,7 +39,7 @@ def stacked_lstm_net(input_dim,
act
=
'softmax'
)
cost
=
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
adam_optimizer
=
optimizer
.
AdamOptimizer
(
learning_rate
=
0.002
)
adam_optimizer
=
AdamOptimizer
(
learning_rate
=
0.002
)
opts
=
adam_optimizer
.
minimize
(
avg_cost
)
acc
=
layers
.
accuracy
(
input
=
prediction
,
label
=
label
)
return
avg_cost
,
acc
...
...
python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py
浏览文件 @
5ee63bb6
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.optimizer
as
optimizer
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
from
paddle.v2.fluid.executor
import
Executor
import
numpy
as
np
from
paddle.v2.fluid.optimizer
import
AdamOptimizer
def
lstm_net
(
dict_dim
,
class_dim
=
2
,
emb_dim
=
32
,
seq_len
=
80
,
batch_size
=
50
):
...
...
@@ -33,7 +32,7 @@ def lstm_net(dict_dim, class_dim=2, emb_dim=32, seq_len=80, batch_size=50):
cost
=
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
adam_optimizer
=
optimizer
.
AdamOptimizer
(
learning_rate
=
0.002
)
adam_optimizer
=
AdamOptimizer
(
learning_rate
=
0.002
)
opts
=
adam_optimizer
.
minimize
(
avg_cost
)
acc
=
layers
.
accuracy
(
input
=
prediction
,
label
=
label
)
...
...
python/paddle/v2/fluid/tests/book/test_word2vec.py
浏览文件 @
5ee63bb6
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.optimizer
as
optimizer
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
from
paddle.v2.fluid.executor
import
Executor
import
numpy
as
np
from
paddle.v2.fluid.optimizer
import
SGDOptimizer
PASS_NUM
=
100
EMBED_SIZE
=
32
...
...
@@ -17,26 +16,11 @@ IS_SPARSE = True
word_dict
=
paddle
.
dataset
.
imikolov
.
build_dict
()
dict_size
=
len
(
word_dict
)
first_word
=
layers
.
data
(
name
=
'firstw'
,
shape
=
[
1
],
data_type
=
'int64'
)
second_word
=
layers
.
data
(
name
=
'secondw'
,
shape
=
[
1
],
data_type
=
'int64'
)
third_word
=
layers
.
data
(
name
=
'thirdw'
,
shape
=
[
1
],
data_type
=
'int64'
)
forth_word
=
layers
.
data
(
name
=
'forthw'
,
shape
=
[
1
],
data_type
=
'int64'
)
next_word
=
layers
.
data
(
name
=
'nextw'
,
shape
=
[
1
],
data_type
=
'int64'
)
first_word
=
layers
.
data
(
name
=
'firstw'
,
shape
=
[
1
],
data_type
=
'int64'
)
second_word
=
layers
.
data
(
name
=
'secondw'
,
shape
=
[
1
],
data_type
=
'int64'
)
third_word
=
layers
.
data
(
name
=
'thirdw'
,
shape
=
[
1
],
data_type
=
'int64'
)
forth_word
=
layers
.
data
(
name
=
'forthw'
,
shape
=
[
1
],
data_type
=
'int64'
)
next_word
=
layers
.
data
(
name
=
'nextw'
,
shape
=
[
1
],
data_type
=
'int64'
)
embed_first
=
layers
.
embedding
(
input
=
first_word
,
...
...
@@ -64,19 +48,12 @@ embed_forth = layers.embedding(
param_attr
=
{
'name'
:
'shared_w'
})
concat_embed
=
layers
.
concat
(
input
=
[
embed_first
,
embed_second
,
embed_third
,
embed_forth
],
axis
=
1
)
hidden1
=
layers
.
fc
(
input
=
concat_embed
,
size
=
HIDDEN_SIZE
,
act
=
'sigmoid'
)
predict_word
=
layers
.
fc
(
input
=
hidden1
,
size
=
dict_size
,
act
=
'softmax'
)
cost
=
layers
.
cross_entropy
(
input
=
predict_word
,
label
=
next_word
)
input
=
[
embed_first
,
embed_second
,
embed_third
,
embed_forth
],
axis
=
1
)
hidden1
=
layers
.
fc
(
input
=
concat_embed
,
size
=
HIDDEN_SIZE
,
act
=
'sigmoid'
)
predict_word
=
layers
.
fc
(
input
=
hidden1
,
size
=
dict_size
,
act
=
'softmax'
)
cost
=
layers
.
cross_entropy
(
input
=
predict_word
,
label
=
next_word
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
sgd_optimizer
=
optimizer
.
SGDOptimizer
(
learning_rate
=
0.001
)
sgd_optimizer
=
SGDOptimizer
(
learning_rate
=
0.001
)
opts
=
sgd_optimizer
.
minimize
(
avg_cost
)
train_reader
=
paddle
.
batch
(
...
...
python/paddle/v2/fluid/tests/test_conv2d_op.py
浏览文件 @
5ee63bb6
...
...
@@ -110,13 +110,30 @@ class TestConv2dOp(OpTest):
self
.
op_type
=
"conv2d"
class
TestWithPad
(
TestConv2dOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
1
,
1
]
self
.
stride
=
[
1
,
1
]
self
.
input_size
=
[
2
,
3
,
5
,
5
]
# NCHW
assert
np
.
mod
(
self
.
input_size
[
1
],
self
.
groups
)
==
0
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
3
,
3
]
class
TestWithStride
(
TestConv2dOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
1
,
1
]
self
.
stride
=
[
2
,
2
]
self
.
input_size
=
[
2
,
3
,
6
,
6
]
# NCHW
assert
np
.
mod
(
self
.
input_size
[
1
],
self
.
groups
)
==
0
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
3
,
3
]
class
TestWithGroup
(
TestConv2dOp
):
def
init_group
(
self
):
self
.
groups
=
3
def
init_op_type
(
self
):
self
.
op_type
=
"conv2d"
class
TestWith1x1
(
TestConv2dOp
):
def
init_test_case
(
self
):
...
...
@@ -127,15 +144,9 @@ class TestWith1x1(TestConv2dOp):
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
1
,
1
]
def
init_dilation
(
self
):
self
.
dilations
=
[
1
,
1
]
def
init_group
(
self
):
self
.
groups
=
3
def
init_op_type
(
self
):
self
.
op_type
=
"conv2d"
class
TestWithDilation
(
TestConv2dOp
):
def
init_test_case
(
self
):
...
...
@@ -152,14 +163,19 @@ class TestWithDilation(TestConv2dOp):
def
init_group
(
self
):
self
.
groups
=
3
#----------------Conv2dCudnn----------------
class
TestCudnn
(
TestConv2dOp
):
def
init_op_type
(
self
):
self
.
op_type
=
"conv
2d
"
self
.
op_type
=
"conv
_cudnn
"
#----------------Conv2dCudnn----------------
class
TestCudnnWithPad
(
TestWithPad
):
def
init_op_type
(
self
):
self
.
op_type
=
"conv_cudnn"
class
TestCudnn
(
TestConv2dOp
):
class
TestCudnn
WithStride
(
TestWithStride
):
def
init_op_type
(
self
):
self
.
op_type
=
"conv_cudnn"
...
...
python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py
浏览文件 @
5ee63bb6
...
...
@@ -4,9 +4,7 @@ from op_test import OpTest
def
conv2dtranspose_forward_naive
(
input_
,
filter_
,
conv2dtranspose_param
):
# [2, 3, 5, 5]
in_n
,
in_c
,
in_h
,
in_w
=
input_
.
shape
# [3, 6, 3, 3]
f_c
,
out_c
,
f_h
,
f_w
=
filter_
.
shape
assert
in_c
==
f_c
...
...
@@ -29,6 +27,7 @@ def conv2dtranspose_forward_naive(input_, filter_, conv2dtranspose_param):
j1
,
j2
=
j
*
stride
[
0
],
j
*
stride
[
0
]
+
f_w
out
[
n
,
k
,
i1
:
i2
,
j1
:
j2
]
+=
tmp_out
out
=
out
[:,
:,
pad
[
0
]:
out_h
-
pad
[
0
],
pad
[
1
]:
out_w
-
pad
[
1
]]
return
out
...
...
@@ -36,8 +35,6 @@ class TestConv2dTransposeOp(OpTest):
def
setUp
(
self
):
# init as conv transpose
self
.
init_op_type
()
# [2, 3, 5, 5] -> kernel [3, 6, 3, 3] -> output [2, 6, 7, 7]
self
.
init_test_case
()
conv2dtranspose_param
=
{
'stride'
:
self
.
stride
,
'pad'
:
self
.
pad
}
...
...
@@ -55,7 +52,6 @@ class TestConv2dTransposeOp(OpTest):
self
.
outputs
=
{
'Output'
:
output
}
def
test_check_output
(
self
):
print
'check output here for'
,
self
.
op_type
self
.
check_output
()
def
test_check_grad_no_input
(
self
):
...
...
@@ -88,6 +84,26 @@ class TestConv2dTransposeOp(OpTest):
self
.
op_type
=
"conv2d_transpose"
class
TestWithPad
(
TestConv2dTransposeOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
1
,
1
]
self
.
stride
=
[
1
,
1
]
self
.
dilations
=
[
1
,
1
]
self
.
input_size
=
[
2
,
3
,
5
,
5
]
# NCHW
f_c
=
self
.
input_size
[
1
]
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
]
class
TestWithStride
(
TestConv2dTransposeOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
1
,
1
]
self
.
stride
=
[
2
,
2
]
self
.
dilations
=
[
1
,
1
]
self
.
input_size
=
[
2
,
3
,
5
,
5
]
# NCHW
f_c
=
self
.
input_size
[
1
]
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
]
# ------------ test_cudnn ------------
class
TestCudnn
(
TestConv2dTransposeOp
):
def
init_op_type
(
self
):
...
...
python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py
浏览文件 @
5ee63bb6
...
...
@@ -4,9 +4,7 @@ from op_test import OpTest
def
conv3dtranspose_forward_naive
(
input_
,
filter_
,
conv3dtranspose_param
):
# [2, 3, 5, 5, 5]
in_n
,
in_c
,
in_d
,
in_h
,
in_w
=
input_
.
shape
# [3, 6, 3, 3, 3]
f_c
,
out_c
,
f_d
,
f_h
,
f_w
=
filter_
.
shape
assert
in_c
==
f_c
...
...
@@ -14,7 +12,6 @@ def conv3dtranspose_forward_naive(input_, filter_, conv3dtranspose_param):
out_d
=
(
in_d
-
1
)
*
stride
[
0
]
+
f_d
out_h
=
(
in_h
-
1
)
*
stride
[
1
]
+
f_h
out_w
=
(
in_w
-
1
)
*
stride
[
2
]
+
f_w
out
=
np
.
zeros
((
in_n
,
out_c
,
out_d
,
out_h
,
out_w
))
for
n
in
range
(
in_n
):
...
...
@@ -33,6 +30,8 @@ def conv3dtranspose_forward_naive(input_, filter_, conv3dtranspose_param):
j1
,
j2
=
j
*
stride
[
2
],
j
*
stride
[
2
]
+
f_w
out
[
n
,
k
,
d1
:
d2
,
i1
:
i2
,
j1
:
j2
]
+=
tmp_out
out
=
out
[:,
:,
pad
[
0
]:
out_d
-
pad
[
0
],
pad
[
1
]:
out_h
-
pad
[
1
],
pad
[
2
]:
out_w
-
pad
[
2
]]
return
out
...
...
@@ -40,8 +39,6 @@ class TestConv3dTransposeOp(OpTest):
def
setUp
(
self
):
# init as conv transpose
self
.
init_op_type
()
# [2, 3, 5, 5, 5] -> kernel [3, 6, 3, 3, 3] -> output [2, 6, 7, 7, 7]
self
.
init_test_case
()
conv3dtranspose_param
=
{
'stride'
:
self
.
stride
,
'pad'
:
self
.
pad
}
...
...
@@ -49,7 +46,6 @@ class TestConv3dTransposeOp(OpTest):
filter_
=
np
.
random
.
random
(
self
.
filter_size
).
astype
(
"float32"
)
output
=
conv3dtranspose_forward_naive
(
input_
,
filter_
,
conv3dtranspose_param
).
astype
(
"float32"
)
# print 'deconv output py', output, output.shape
self
.
inputs
=
{
'Input'
:
input_
,
'Filter'
:
filter_
}
self
.
attrs
=
{
...
...
@@ -60,7 +56,6 @@ class TestConv3dTransposeOp(OpTest):
self
.
outputs
=
{
'Output'
:
output
}
def
test_check_output
(
self
):
print
'check output here'
self
.
check_output
()
def
test_check_grad
(
self
):
...
...
@@ -85,7 +80,7 @@ class TestConv3dTransposeOp(OpTest):
self
.
pad
=
[
0
,
0
,
0
]
self
.
stride
=
[
1
,
1
,
1
]
self
.
dilations
=
[
1
,
1
,
1
]
self
.
input_size
=
[
2
,
3
,
5
,
5
,
5
]
# NCHW
self
.
input_size
=
[
2
,
3
,
5
,
5
,
5
]
# NC
D
HW
f_c
=
self
.
input_size
[
1
]
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
,
3
]
...
...
@@ -93,5 +88,31 @@ class TestConv3dTransposeOp(OpTest):
self
.
op_type
=
"conv3d_transpose"
class
TestWithPad
(
TestConv3dTransposeOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
1
,
1
,
1
]
self
.
stride
=
[
1
,
1
,
1
]
self
.
dilations
=
[
1
,
1
,
1
]
self
.
input_size
=
[
2
,
3
,
5
,
5
,
5
]
# NCDHW
f_c
=
self
.
input_size
[
1
]
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
,
3
]
class
TestWithStride
(
TestConv3dTransposeOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
1
,
1
,
1
]
self
.
stride
=
[
2
,
2
,
2
]
self
.
dilations
=
[
1
,
1
,
1
]
self
.
input_size
=
[
2
,
3
,
5
,
5
,
5
]
# NCDHW
f_c
=
self
.
input_size
[
1
]
self
.
filter_size
=
[
f_c
,
6
,
3
,
3
,
3
]
# ------------ test_cudnn ------------
class
TestCudnn
(
TestConv3dTransposeOp
):
def
init_op_type
(
self
):
self
.
op_type
=
"conv3d_transpose_cudnn"
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/v2/fluid/tests/test_gru_op.py
浏览文件 @
5ee63bb6
...
...
@@ -6,7 +6,8 @@ from test_lstm_op import identity, sigmoid, tanh, relu
class
TestGRUOp
(
OpTest
):
batch_size
=
9
lod
=
[[
0
,
2
,
6
,
9
]]
batch_size
=
lod
[
0
][
-
1
]
frame_size
=
5
activate
=
{
'identity'
:
identity
,
...
...
@@ -35,7 +36,7 @@ class TestGRUOp(OpTest):
seq_starts
[
sorted_seqs
[
i
]]
+
batch_idx
)
idx_in_seq
.
append
(
idx
)
idx_in_seq_list
.
append
(
idx_in_seq
)
return
idx_in_seq_list
return
idx_in_seq_list
,
sorted_seqs
def
gru_step
(
self
,
x
,
h_p
,
w
,
b
):
batch_size
=
x
.
shape
[
0
]
...
...
@@ -66,8 +67,8 @@ class TestGRUOp(OpTest):
batch_hidden
=
self
.
outputs
[
'BatchHidden'
]
hidden
=
self
.
outputs
[
'Hidden'
]
idx_in_seq_list
=
self
.
idx_in_seq_list
h_p
=
self
.
inputs
[
'H0'
]
if
self
.
inputs
.
has_key
(
'H0'
)
else
np
.
zeros
(
(
len
(
idx_in_seq_list
[
0
]),
self
.
frame_size
))
h_p
=
self
.
inputs
[
'H0'
]
[
self
.
sorted_seqs
]
if
self
.
inputs
.
has_key
(
'H0'
)
else
np
.
zeros
(
(
len
(
idx_in_seq_list
[
0
]),
self
.
frame_size
))
num_batch
=
len
(
idx_in_seq_list
)
end_idx
=
0
for
batch_idx
in
range
(
num_batch
):
...
...
@@ -84,8 +85,9 @@ class TestGRUOp(OpTest):
return
batch_gate
,
batch_reset_hidden_prev
,
hidden
def
set_data
(
self
):
lod
=
[[
0
,
2
,
6
,
self
.
batch_size
]]
self
.
idx_in_seq_list
=
self
.
seq_to_batch
(
lod
,
self
.
is_reverse
)
lod
=
self
.
lod
self
.
idx_in_seq_list
,
self
.
sorted_seqs
=
self
.
seq_to_batch
(
lod
,
self
.
is_reverse
)
batch_size
=
self
.
batch_size
frame_size
=
self
.
frame_size
input
=
np
.
random
.
rand
(
batch_size
,
frame_size
*
3
).
astype
(
'float64'
)
...
...
@@ -146,7 +148,7 @@ class TestGRUOpReverse(TestGRUOp):
def
set_confs
(
self
):
self
.
is_reverse
=
True
self
.
attrs
=
{
'activation'
:
'
identity
'
,
'activation'
:
'
tanh
'
,
'gate_activation'
:
'sigmoid'
,
'is_reverse'
:
self
.
is_reverse
}
...
...
python/paddle/v2/f
ramework
/tests/test_is_empty_op.py
→
python/paddle/v2/f
luid
/tests/test_is_empty_op.py
浏览文件 @
5ee63bb6
import
unittest
import
numpy
as
np
from
paddle.v2.f
ramework
.op
import
Operator
import
paddle.v2.f
ramework
.core
as
core
from
paddle.v2.f
luid
.op
import
Operator
import
paddle.v2.f
luid
.core
as
core
def
create_tensor
(
scope
,
name
,
np_data
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录