Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
81520a24
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
81520a24
编写于
12月 12, 2018
作者:
Y
Yu Yang
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into feature/refine_eigen_tensor
上级
9bd70a1e
8175983e
变更
11
显示空白变更内容
内联
并排
Showing
11 changed file
with
182 addition
and
49 deletion
+182
-49
CMakeLists.txt
CMakeLists.txt
+7
-1
cmake/FindGperftools.cmake
cmake/FindGperftools.cmake
+63
-0
cmake/generic.cmake
cmake/generic.cmake
+16
-0
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+30
-1
paddle/fluid/operators/math/jit_kernel_layer_norm.cc
paddle/fluid/operators/math/jit_kernel_layer_norm.cc
+8
-5
paddle/fluid/operators/split_selected_rows_op.h
paddle/fluid/operators/split_selected_rows_op.h
+6
-3
paddle/fluid/platform/enforce.h
paddle/fluid/platform/enforce.h
+42
-38
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+2
-0
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+2
-1
python/paddle/fluid/average.py
python/paddle/fluid/average.py
+1
-0
python/paddle/fluid/tests/unittests/test_split_selected_rows_op.py
...ddle/fluid/tests/unittests/test_split_selected_rows_op.py
+5
-0
未找到文件。
CMakeLists.txt
浏览文件 @
81520a24
...
@@ -54,7 +54,7 @@ option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON)
...
@@ -54,7 +54,7 @@ option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON)
option
(
WITH_DOUBLE
"Compile PaddlePaddle with double precision"
OFF
)
option
(
WITH_DOUBLE
"Compile PaddlePaddle with double precision"
OFF
)
option
(
WITH_RDMA
"Compile PaddlePaddle with RDMA support"
OFF
)
option
(
WITH_RDMA
"Compile PaddlePaddle with RDMA support"
OFF
)
option
(
WITH_TIMER
"Compile PaddlePaddle with stats timer"
OFF
)
option
(
WITH_TIMER
"Compile PaddlePaddle with stats timer"
OFF
)
option
(
WITH_PROFILER
"Compile PaddlePaddle with GPU profiler"
OFF
)
option
(
WITH_PROFILER
"Compile PaddlePaddle with GPU profiler
and gperftools
"
OFF
)
option
(
WITH_DOC
"Compile PaddlePaddle with documentation"
OFF
)
option
(
WITH_DOC
"Compile PaddlePaddle with documentation"
OFF
)
option
(
WITH_COVERAGE
"Compile PaddlePaddle with code coverage"
OFF
)
option
(
WITH_COVERAGE
"Compile PaddlePaddle with code coverage"
OFF
)
option
(
COVERALLS_UPLOAD
"Package code coverage data to coveralls"
OFF
)
option
(
COVERALLS_UPLOAD
"Package code coverage data to coveralls"
OFF
)
...
@@ -254,6 +254,12 @@ elseif()
...
@@ -254,6 +254,12 @@ elseif()
set
(
WITH_ANAKIN OFF CACHE STRING
"Anakin is used in MKL only now."
FORCE
)
set
(
WITH_ANAKIN OFF CACHE STRING
"Anakin is used in MKL only now."
FORCE
)
endif
()
endif
()
if
(
WITH_PROFILER
)
find_package
(
Gperftools REQUIRED
)
include_directories
(
${
GPERFTOOLS_INCLUDE_DIR
}
)
add_definitions
(
-DWITH_GPERFTOOLS
)
endif
()
include
(
generic
)
# simplify cmake module
include
(
generic
)
# simplify cmake module
include
(
package
)
# set paddle packages
include
(
package
)
# set paddle packages
include
(
ccache
)
# set ccache for compilation
include
(
ccache
)
# set ccache for compilation
...
...
cmake/FindGperftools.cmake
0 → 100644
浏览文件 @
81520a24
# Tries to find Gperftools.
#
# Usage of this module as follows:
#
# find_package(Gperftools)
#
# Variables used by this module, they can change the default behaviour and need
# to be set before calling find_package:
#
# Gperftools_ROOT_DIR Set this variable to the root installation of
# Gperftools if the module has problems finding
# the proper installation path.
#
# Variables defined by this module:
#
# GPERFTOOLS_FOUND System has Gperftools libs/headers
# GPERFTOOLS_LIBRARIES The Gperftools libraries (tcmalloc & profiler)
# GPERFTOOLS_INCLUDE_DIR The location of Gperftools headers
find_library
(
GPERFTOOLS_TCMALLOC
NAMES tcmalloc
HINTS
${
Gperftools_ROOT_DIR
}
/lib
)
find_library
(
GPERFTOOLS_PROFILER
NAMES profiler
HINTS
${
Gperftools_ROOT_DIR
}
/lib
)
find_library
(
GPERFTOOLS_TCMALLOC_AND_PROFILER
NAMES tcmalloc_and_profiler
HINTS
${
Gperftools_ROOT_DIR
}
/lib
)
find_path
(
GPERFTOOLS_INCLUDE_DIR
NAMES gperftools/heap-profiler.h
HINTS
${
Gperftools_ROOT_DIR
}
/include
)
set
(
GPERFTOOLS_LIBRARIES
${
GPERFTOOLS_TCMALLOC_AND_PROFILER
}
)
include
(
FindPackageHandleStandardArgs
)
find_package_handle_standard_args
(
Gperftools
DEFAULT_MSG
GPERFTOOLS_LIBRARIES
GPERFTOOLS_INCLUDE_DIR
)
mark_as_advanced
(
Gperftools_ROOT_DIR
GPERFTOOLS_TCMALLOC
GPERFTOOLS_PROFILER
GPERFTOOLS_TCMALLOC_AND_PROFILER
GPERFTOOLS_LIBRARIES
GPERFTOOLS_INCLUDE_DIR
)
# create IMPORTED targets
if
(
Gperftools_FOUND AND NOT TARGET gperftools::tcmalloc
)
add_library
(
gperftools::tcmalloc UNKNOWN IMPORTED
)
set_target_properties
(
gperftools::tcmalloc PROPERTIES
IMPORTED_LOCATION
${
GPERFTOOLS_TCMALLOC
}
INTERFACE_INCLUDE_DIRECTORIES
"
${
GPERFTOOLS_INCLUDE_DIR
}
"
)
add_library
(
gperftools::profiler UNKNOWN IMPORTED
)
set_target_properties
(
gperftools::profiler PROPERTIES
IMPORTED_LOCATION
${
GPERFTOOLS_PROFILER
}
INTERFACE_INCLUDE_DIRECTORIES
"
${
GPERFTOOLS_INCLUDE_DIR
}
"
)
endif
()
cmake/generic.cmake
浏览文件 @
81520a24
...
@@ -110,6 +110,14 @@ function(find_fluid_modules TARGET_NAME)
...
@@ -110,6 +110,14 @@ function(find_fluid_modules TARGET_NAME)
endif
()
endif
()
endfunction
(
find_fluid_modules
)
endfunction
(
find_fluid_modules
)
function
(
common_link TARGET_NAME
)
if
(
WITH_PROFILER
)
target_link_libraries
(
${
TARGET_NAME
}
gperftools::profiler
)
endif
()
endfunction
()
# find all third_party modules is used for paddle static library
# find all third_party modules is used for paddle static library
# for reduce the dependency when building the inference libs.
# for reduce the dependency when building the inference libs.
set_property
(
GLOBAL PROPERTY FLUID_THIRD_PARTY
)
set_property
(
GLOBAL PROPERTY FLUID_THIRD_PARTY
)
...
@@ -274,6 +282,7 @@ function(cc_library TARGET_NAME)
...
@@ -274,6 +282,7 @@ function(cc_library TARGET_NAME)
endif
()
endif
()
target_link_libraries
(
${
TARGET_NAME
}
${
cc_library_DEPS
}
)
target_link_libraries
(
${
TARGET_NAME
}
${
cc_library_DEPS
}
)
add_dependencies
(
${
TARGET_NAME
}
${
cc_library_DEPS
}
)
add_dependencies
(
${
TARGET_NAME
}
${
cc_library_DEPS
}
)
common_link
(
${
TARGET_NAME
}
)
endif
()
endif
()
# cpplint code style
# cpplint code style
...
@@ -340,6 +349,7 @@ function(cc_binary TARGET_NAME)
...
@@ -340,6 +349,7 @@ function(cc_binary TARGET_NAME)
if
(
cc_binary_DEPS
)
if
(
cc_binary_DEPS
)
target_link_libraries
(
${
TARGET_NAME
}
${
cc_binary_DEPS
}
)
target_link_libraries
(
${
TARGET_NAME
}
${
cc_binary_DEPS
}
)
add_dependencies
(
${
TARGET_NAME
}
${
cc_binary_DEPS
}
)
add_dependencies
(
${
TARGET_NAME
}
${
cc_binary_DEPS
}
)
common_link
(
${
TARGET_NAME
}
)
endif
()
endif
()
endfunction
(
cc_binary
)
endfunction
(
cc_binary
)
...
@@ -362,6 +372,7 @@ function(cc_test TARGET_NAME)
...
@@ -362,6 +372,7 @@ function(cc_test TARGET_NAME)
target_link_libraries
(
${
TARGET_NAME
}
${
win32_deps
}
)
target_link_libraries
(
${
TARGET_NAME
}
${
win32_deps
}
)
endif
(
WIN32
)
endif
(
WIN32
)
add_dependencies
(
${
TARGET_NAME
}
${
cc_test_DEPS
}
paddle_gtest_main lod_tensor memory gtest gflags glog
)
add_dependencies
(
${
TARGET_NAME
}
${
cc_test_DEPS
}
paddle_gtest_main lod_tensor memory gtest gflags glog
)
common_link
(
${
TARGET_NAME
}
)
add_test
(
NAME
${
TARGET_NAME
}
add_test
(
NAME
${
TARGET_NAME
}
COMMAND
${
TARGET_NAME
}
${
cc_test_ARGS
}
COMMAND
${
TARGET_NAME
}
${
cc_test_ARGS
}
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
...
@@ -420,6 +431,7 @@ function(nv_binary TARGET_NAME)
...
@@ -420,6 +431,7 @@ function(nv_binary TARGET_NAME)
if
(
nv_binary_DEPS
)
if
(
nv_binary_DEPS
)
target_link_libraries
(
${
TARGET_NAME
}
${
nv_binary_DEPS
}
)
target_link_libraries
(
${
TARGET_NAME
}
${
nv_binary_DEPS
}
)
add_dependencies
(
${
TARGET_NAME
}
${
nv_binary_DEPS
}
)
add_dependencies
(
${
TARGET_NAME
}
${
nv_binary_DEPS
}
)
common_link
(
${
TARGET_NAME
}
)
endif
()
endif
()
endif
()
endif
()
endfunction
(
nv_binary
)
endfunction
(
nv_binary
)
...
@@ -433,6 +445,7 @@ function(nv_test TARGET_NAME)
...
@@ -433,6 +445,7 @@ function(nv_test TARGET_NAME)
cuda_add_executable
(
${
TARGET_NAME
}
${
nv_test_SRCS
}
)
cuda_add_executable
(
${
TARGET_NAME
}
${
nv_test_SRCS
}
)
target_link_libraries
(
${
TARGET_NAME
}
${
nv_test_DEPS
}
paddle_gtest_main lod_tensor memory gtest gflags glog
)
target_link_libraries
(
${
TARGET_NAME
}
${
nv_test_DEPS
}
paddle_gtest_main lod_tensor memory gtest gflags glog
)
add_dependencies
(
${
TARGET_NAME
}
${
nv_test_DEPS
}
paddle_gtest_main lod_tensor memory gtest gflags glog
)
add_dependencies
(
${
TARGET_NAME
}
${
nv_test_DEPS
}
paddle_gtest_main lod_tensor memory gtest gflags glog
)
common_link
(
${
TARGET_NAME
}
)
add_test
(
${
TARGET_NAME
}
${
TARGET_NAME
}
)
add_test
(
${
TARGET_NAME
}
${
TARGET_NAME
}
)
if
(
nv_test_SERIAL
)
if
(
nv_test_SERIAL
)
set_property
(
TEST
${
TARGET_NAME
}
PROPERTY RUN_SERIAL 1
)
set_property
(
TEST
${
TARGET_NAME
}
PROPERTY RUN_SERIAL 1
)
...
@@ -499,6 +512,7 @@ function(hip_binary TARGET_NAME)
...
@@ -499,6 +512,7 @@ function(hip_binary TARGET_NAME)
if
(
hip_binary_DEPS
)
if
(
hip_binary_DEPS
)
target_link_libraries
(
${
TARGET_NAME
}
${
hip_binary_DEPS
}
)
target_link_libraries
(
${
TARGET_NAME
}
${
hip_binary_DEPS
}
)
add_dependencies
(
${
TARGET_NAME
}
${
hip_binary_DEPS
}
)
add_dependencies
(
${
TARGET_NAME
}
${
hip_binary_DEPS
}
)
common_link
(
${
TARGET_NAME
}
)
endif
()
endif
()
endif
()
endif
()
endfunction
(
hip_binary
)
endfunction
(
hip_binary
)
...
@@ -518,6 +532,7 @@ function(hip_test TARGET_NAME)
...
@@ -518,6 +532,7 @@ function(hip_test TARGET_NAME)
set_target_properties
(
${
TARGET_NAME
}
PROPERTIES LINKER_LANGUAGE HIP
)
set_target_properties
(
${
TARGET_NAME
}
PROPERTIES LINKER_LANGUAGE HIP
)
target_link_libraries
(
${
TARGET_NAME
}
${
hip_test_DEPS
}
paddle_gtest_main memory gtest gflags
)
target_link_libraries
(
${
TARGET_NAME
}
${
hip_test_DEPS
}
paddle_gtest_main memory gtest gflags
)
add_dependencies
(
${
TARGET_NAME
}
${
hip_test_DEPS
}
paddle_gtest_main memory gtest gflags
)
add_dependencies
(
${
TARGET_NAME
}
${
hip_test_DEPS
}
paddle_gtest_main memory gtest gflags
)
common_link
(
${
TARGET_NAME
}
)
add_test
(
${
TARGET_NAME
}
${
TARGET_NAME
}
)
add_test
(
${
TARGET_NAME
}
${
TARGET_NAME
}
)
endif
()
endif
()
endfunction
(
hip_test
)
endfunction
(
hip_test
)
...
@@ -560,6 +575,7 @@ function(go_library TARGET_NAME)
...
@@ -560,6 +575,7 @@ function(go_library TARGET_NAME)
endif
()
endif
()
if
(
go_library_DEPS
)
if
(
go_library_DEPS
)
add_dependencies
(
${
TARGET_NAME
}
${
go_library_DEPS
}
)
add_dependencies
(
${
TARGET_NAME
}
${
go_library_DEPS
}
)
common_link
(
${
TARGET_NAME
}
)
endif
(
go_library_DEPS
)
endif
(
go_library_DEPS
)
# The "source file" of the library is `${dummyfile}` which never
# The "source file" of the library is `${dummyfile}` which never
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
81520a24
...
@@ -30,13 +30,36 @@ limitations under the License. */
...
@@ -30,13 +30,36 @@ limitations under the License. */
#include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h"
#include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
#ifdef WITH_GPERFTOOLS
#include "gperftools/profiler.h"
#endif
DEFINE_string
(
pe_profile_fname
,
""
,
"Profiler filename for PE, which generated by gperftools."
"Only valid when compiled `WITH_PRIFILER=ON`. Empty if disable."
);
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
static
std
::
once_flag
gProfileOnce
;
#ifdef WITH_GPERFTOOLS
static
bool
gProfileStarted
=
false
;
#endif
class
ParallelExecutorPrivate
{
class
ParallelExecutorPrivate
{
public:
public:
explicit
ParallelExecutorPrivate
(
const
std
::
vector
<
platform
::
Place
>
&
places
)
explicit
ParallelExecutorPrivate
(
const
std
::
vector
<
platform
::
Place
>
&
places
)
:
places_
(
places
)
{}
:
places_
(
places
)
{
if
(
!
FLAGS_pe_profile_fname
.
empty
())
{
std
::
call_once
(
gProfileOnce
,
[]
{
#ifdef WITH_GPERFTOOLS
ProfilerStart
(
FLAGS_pe_profile_fname
.
c_str
());
gProfileStarted
=
true
;
#else
LOG
(
WARNING
)
<<
"Paddle is not compiled with gperftools. "
"FLAGS_pe_profile_fname will be ignored"
;
#endif
});
}
}
~
ParallelExecutorPrivate
()
{
~
ParallelExecutorPrivate
()
{
if
(
own_local_scope_
)
{
if
(
own_local_scope_
)
{
...
@@ -270,6 +293,12 @@ void ParallelExecutor::BCastParamsToDevices(
...
@@ -270,6 +293,12 @@ void ParallelExecutor::BCastParamsToDevices(
void
ParallelExecutor
::
Run
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
,
void
ParallelExecutor
::
Run
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
,
const
std
::
string
&
fetched_var_name
)
{
const
std
::
string
&
fetched_var_name
)
{
#ifdef WITH_GPERFTOOLS
if
(
gProfileStarted
)
{
ProfilerFlush
();
}
#endif
platform
::
RecordBlock
b
(
0
);
platform
::
RecordBlock
b
(
0
);
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
if
(
!
gcs_
.
empty
())
{
if
(
!
gcs_
.
empty
())
{
...
...
paddle/fluid/operators/math/jit_kernel_layer_norm.cc
浏览文件 @
81520a24
...
@@ -79,16 +79,16 @@ class LayerNormKernelImpl : public LayerNormKernel<T> {
...
@@ -79,16 +79,16 @@ class LayerNormKernelImpl : public LayerNormKernel<T> {
}
}
};
};
#define INTRIAVX_FLOAT(isa,
block)
\
#define INTRIAVX_FLOAT(isa,
jit_block)
\
template <> \
template <> \
LayerNormKernelImpl<float, isa,
block>::LayerNormKernelImpl(int right)
\
LayerNormKernelImpl<float, isa,
jit_block>::LayerNormKernelImpl(int right)
\
: LayerNormKernel<float>() { \
: LayerNormKernel<float>() { \
this->num_ = right; \
this->num_ = right; \
this->rest_ = this->num_ % YMM_FLOAT_BLOCK; \
this->rest_ = this->num_ % YMM_FLOAT_BLOCK; \
this->end_ = this->num_ - this->rest_; \
this->end_ = this->num_ - this->rest_; \
} \
} \
template <> \
template <> \
void LayerNormKernelImpl<float,
platform::avx, block>::Compute(
\
void LayerNormKernelImpl<float,
isa, jit_block>::Compute(
\
float* x, float* out, float* mean, float* var, const float* scale, \
float* x, float* out, float* mean, float* var, const float* scale, \
const float* bias, int height, const float epsilon) const { \
const float* bias, int height, const float epsilon) const { \
__m256 sum; \
__m256 sum; \
...
@@ -97,6 +97,7 @@ class LayerNormKernelImpl : public LayerNormKernel<T> {
...
@@ -97,6 +97,7 @@ class LayerNormKernelImpl : public LayerNormKernel<T> {
__m256 tmp; \
__m256 tmp; \
size_t offset; \
size_t offset; \
size_t j; \
size_t j; \
size_t block = YMM_FLOAT_BLOCK; \
__m256 reverse_num_vec = \
__m256 reverse_num_vec = \
_mm256_div_ps(_mm256_set1_ps(1.0), _mm256_set1_ps(this->num_)); \
_mm256_div_ps(_mm256_set1_ps(1.0), _mm256_set1_ps(this->num_)); \
__m256 epsilon_vec = _mm256_set1_ps(epsilon); \
__m256 epsilon_vec = _mm256_set1_ps(epsilon); \
...
@@ -221,12 +222,14 @@ INTRIAVX_FLOAT(platform::avx, kEQ8);
...
@@ -221,12 +222,14 @@ INTRIAVX_FLOAT(platform::avx, kEQ8);
INTRIAVX_FLOAT
(
platform
::
avx
,
kGT8LT16
);
INTRIAVX_FLOAT
(
platform
::
avx
,
kGT8LT16
);
INTRIAVX_FLOAT
(
platform
::
avx
,
kEQ16
);
INTRIAVX_FLOAT
(
platform
::
avx
,
kEQ16
);
INTRIAVX_FLOAT
(
platform
::
avx
,
kGT16
);
INTRIAVX_FLOAT
(
platform
::
avx
,
kGT16
);
#endif
#ifdef __AVX2__
INTRIAVX_FLOAT
(
platform
::
avx2
,
kEQ8
);
INTRIAVX_FLOAT
(
platform
::
avx2
,
kEQ8
);
INTRIAVX_FLOAT
(
platform
::
avx2
,
kGT8LT16
);
INTRIAVX_FLOAT
(
platform
::
avx2
,
kGT8LT16
);
INTRIAVX_FLOAT
(
platform
::
avx2
,
kEQ16
);
INTRIAVX_FLOAT
(
platform
::
avx2
,
kEQ16
);
INTRIAVX_FLOAT
(
platform
::
avx2
,
kGT16
);
INTRIAVX_FLOAT
(
platform
::
avx2
,
kGT16
);
INTRIAVX_FLOAT
(
platform
::
avx512f
,
kEQ8
);
INTRIAVX_FLOAT
(
platform
::
avx512f
,
kGT8LT16
);
INTRIAVX_FLOAT
(
platform
::
avx512f
,
kEQ16
);
INTRIAVX_FLOAT
(
platform
::
avx512f
,
kGT16
);
#endif
#endif
#undef INTRIAVX_FLOAT
#undef INTRIAVX_FLOAT
...
...
paddle/fluid/operators/split_selected_rows_op.h
浏览文件 @
81520a24
...
@@ -72,10 +72,11 @@ class SplitSelectedRowsOpKernel : public framework::OpKernel<T> {
...
@@ -72,10 +72,11 @@ class SplitSelectedRowsOpKernel : public framework::OpKernel<T> {
for
(
size_t
i
=
0
;
i
<
outs_rows_idx
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
outs_rows_idx
.
size
();
++
i
)
{
auto
rows_idx
=
outs_rows_idx
[
i
];
auto
rows_idx
=
outs_rows_idx
[
i
];
outs
[
i
]
->
set_height
(
height_sections
[
i
]);
outs
[
i
]
->
set_height
(
height_sections
[
i
]);
if
(
rows_idx
.
size
()
>
0
)
{
auto
dims
=
x
->
GetCompleteDims
();
auto
dims
=
x
->
GetCompleteDims
();
dims
[
0
]
=
rows_idx
.
size
();
dims
[
0
]
=
rows_idx
.
size
();
outs
[
i
]
->
mutable_value
()
->
mutable_data
<
T
>
(
dims
,
x
->
place
());
outs
[
i
]
->
mutable_value
()
->
mutable_data
<
T
>
(
dims
,
x
->
place
());
outs
[
i
]
->
mutable_rows
()
->
clear
();
if
(
rows_idx
.
size
()
>
0
)
{
for
(
auto
idx
:
rows_idx
)
{
for
(
auto
idx
:
rows_idx
)
{
outs
[
i
]
->
mutable_rows
()
->
push_back
(
idx
-
abs_sections
[
i
]);
outs
[
i
]
->
mutable_rows
()
->
push_back
(
idx
-
abs_sections
[
i
]);
}
}
...
@@ -98,6 +99,8 @@ class SplitSelectedRowsOpKernel : public framework::OpKernel<T> {
...
@@ -98,6 +99,8 @@ class SplitSelectedRowsOpKernel : public framework::OpKernel<T> {
}
}
}
}
}
}
PADDLE_ENFORCE_EQ
(
rows_idx
.
size
(),
outs
[
i
]
->
rows
().
size
(),
"rows should has the same size with tensor dim 0"
);
}
}
}
}
};
};
...
...
paddle/fluid/platform/enforce.h
浏览文件 @
81520a24
...
@@ -62,22 +62,34 @@ inline std::string demangle(std::string name) { return name; }
...
@@ -62,22 +62,34 @@ inline std::string demangle(std::string name) { return name; }
#endif
#endif
struct
EnforceNotMet
:
public
std
::
exception
{
struct
EnforceNotMet
:
public
std
::
exception
{
std
::
exception_ptr
exp_
;
std
::
string
err_str_
;
std
::
string
err_str_
;
EnforceNotMet
(
std
::
exception_ptr
e
,
const
char
*
f
,
int
l
)
:
exp_
(
e
)
{
EnforceNotMet
(
std
::
exception_ptr
e
,
const
char
*
f
,
int
l
)
{
static
constexpr
int
TRACE_STACK_LIMIT
=
100
;
try
{
try
{
std
::
rethrow_exception
(
exp_
);
std
::
rethrow_exception
(
e
);
}
catch
(
const
std
::
exception
&
exp
)
{
}
catch
(
std
::
exception
&
e
)
{
Init
(
e
.
what
(),
f
,
l
);
}
}
template
<
typename
...
ARGS
>
EnforceNotMet
(
const
char
*
f
,
int
l
,
ARGS
...
args
)
{
Init
(
string
::
Sprintf
(
args
...),
f
,
l
);
}
const
char
*
what
()
const
noexcept
override
{
return
err_str_
.
c_str
();
}
private:
template
<
typename
StrType
>
inline
void
Init
(
StrType
what
,
const
char
*
f
,
int
l
)
{
static
constexpr
int
TRACE_STACK_LIMIT
=
100
;
std
::
ostringstream
sout
;
std
::
ostringstream
sout
;
sout
<<
string
::
Sprintf
(
"%s at [%s:%d]"
,
exp
.
what
()
,
f
,
l
)
<<
std
::
endl
;
sout
<<
string
::
Sprintf
(
"%s at [%s:%d]"
,
what
,
f
,
l
)
<<
std
::
endl
;
sout
<<
"PaddlePaddle Call Stacks: "
<<
std
::
endl
;
sout
<<
"PaddlePaddle Call Stacks: "
<<
std
::
endl
;
#if !defined(_WIN32)
#if !defined(_WIN32)
void
*
call_stack
[
TRACE_STACK_LIMIT
];
void
*
call_stack
[
TRACE_STACK_LIMIT
];
auto
size
=
backtrace
(
call_stack
,
TRACE_STACK_LIMIT
);
auto
size
=
backtrace
(
call_stack
,
TRACE_STACK_LIMIT
);
auto
symbols
=
backtrace_symbols
(
call_stack
,
size
);
auto
symbols
=
backtrace_symbols
(
call_stack
,
size
);
Dl_info
info
;
Dl_info
info
;
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
if
(
dladdr
(
call_stack
[
i
],
&
info
)
&&
info
.
dli_sname
)
{
if
(
dladdr
(
call_stack
[
i
],
&
info
)
&&
info
.
dli_sname
)
{
...
@@ -85,8 +97,8 @@ struct EnforceNotMet : public std::exception {
...
@@ -85,8 +97,8 @@ struct EnforceNotMet : public std::exception {
auto
addr_offset
=
static_cast
<
char
*>
(
call_stack
[
i
])
-
auto
addr_offset
=
static_cast
<
char
*>
(
call_stack
[
i
])
-
static_cast
<
char
*>
(
info
.
dli_saddr
);
static_cast
<
char
*>
(
info
.
dli_saddr
);
sout
<<
string
::
Sprintf
(
"%-3d %*0p %s + %zd
\n
"
,
i
,
sout
<<
string
::
Sprintf
(
"%-3d %*0p %s + %zd
\n
"
,
i
,
2
+
sizeof
(
void
*
)
*
2
,
call_stack
[
i
]
,
2
+
sizeof
(
void
*
)
*
2
,
call_stack
[
i
],
demangled
,
demangled
,
addr_offset
);
addr_offset
);
}
else
{
}
else
{
sout
<<
string
::
Sprintf
(
"%-3d %*0p
\n
"
,
i
,
2
+
sizeof
(
void
*
)
*
2
,
sout
<<
string
::
Sprintf
(
"%-3d %*0p
\n
"
,
i
,
2
+
sizeof
(
void
*
)
*
2
,
call_stack
[
i
]);
call_stack
[
i
]);
...
@@ -98,9 +110,6 @@ struct EnforceNotMet : public std::exception {
...
@@ -98,9 +110,6 @@ struct EnforceNotMet : public std::exception {
#endif
#endif
err_str_
=
sout
.
str
();
err_str_
=
sout
.
str
();
}
}
}
const
char
*
what
()
const
noexcept
{
return
err_str_
.
c_str
();
}
};
};
struct
EOFException
:
public
std
::
exception
{
struct
EOFException
:
public
std
::
exception
{
...
@@ -243,12 +252,7 @@ inline void throw_on_error(T e) {
...
@@ -243,12 +252,7 @@ inline void throw_on_error(T e) {
}
}
#define PADDLE_THROW(...) \
#define PADDLE_THROW(...) \
do { \
throw ::paddle::platform::EnforceNotMet(__FILE__, __LINE__, __VA_ARGS__)
throw ::paddle::platform::EnforceNotMet( \
std::make_exception_ptr( \
std::runtime_error(paddle::string::Sprintf(__VA_ARGS__))), \
__FILE__, __LINE__); \
} while (false)
#ifndef REPLACE_ENFORCE_GLOG
#ifndef REPLACE_ENFORCE_GLOG
#define PADDLE_ENFORCE(...) \
#define PADDLE_ENFORCE(...) \
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
81520a24
...
@@ -336,6 +336,8 @@ PYBIND11_MODULE(core, m) {
...
@@ -336,6 +336,8 @@ PYBIND11_MODULE(core, m) {
.
def
(
"get_tensor"
,
.
def
(
"get_tensor"
,
[](
SelectedRows
&
self
)
{
return
self
.
mutable_value
();
},
[](
SelectedRows
&
self
)
{
return
self
.
mutable_value
();
},
py
::
return_value_policy
::
reference
)
py
::
return_value_policy
::
reference
)
.
def
(
"numel"
,
[](
SelectedRows
&
self
)
->
int64_t
{
return
self
.
value
().
numel
();
})
.
def
(
"set_height"
,
&
SelectedRows
::
set_height
)
.
def
(
"set_height"
,
&
SelectedRows
::
set_height
)
.
def
(
"height"
,
&
SelectedRows
::
height
)
.
def
(
"height"
,
&
SelectedRows
::
height
)
.
def
(
"set_rows"
,
.
def
(
"set_rows"
,
...
...
python/paddle/fluid/__init__.py
浏览文件 @
81520a24
...
@@ -127,7 +127,8 @@ def __bootstrap__():
...
@@ -127,7 +127,8 @@ def __bootstrap__():
'use_ngraph'
,
'initial_cpu_memory_in_mb'
,
'init_allocated_mem'
,
'use_ngraph'
,
'initial_cpu_memory_in_mb'
,
'init_allocated_mem'
,
'free_idle_memory'
,
'paddle_num_threads'
,
"dist_threadpool_size"
,
'free_idle_memory'
,
'paddle_num_threads'
,
"dist_threadpool_size"
,
'eager_delete_tensor_gb'
,
'allocator_strategy'
,
'eager_delete_tensor_gb'
,
'allocator_strategy'
,
'reader_queue_speed_test_mode'
,
'print_sub_graph_dir'
'reader_queue_speed_test_mode'
,
'print_sub_graph_dir'
,
'pe_profile_fname'
]
]
if
'Darwin'
not
in
sysstr
:
if
'Darwin'
not
in
sysstr
:
read_env_flags
.
append
(
'use_pinned_memory'
)
read_env_flags
.
append
(
'use_pinned_memory'
)
...
...
python/paddle/fluid/average.py
浏览文件 @
81520a24
...
@@ -48,6 +48,7 @@ class WeightedAverage(object):
...
@@ -48,6 +48,7 @@ class WeightedAverage(object):
Examples:
Examples:
.. code-block:: python
.. code-block:: python
avg = fluid.average.WeightedAverage()
avg = fluid.average.WeightedAverage()
avg.add(value=2.0, weight=1)
avg.add(value=2.0, weight=1)
avg.add(value=4.0, weight=2)
avg.add(value=4.0, weight=2)
...
...
python/paddle/fluid/tests/unittests/test_split_selected_rows_op.py
浏览文件 @
81520a24
...
@@ -63,6 +63,7 @@ class TestSpliteSelectedRows(unittest.TestCase):
...
@@ -63,6 +63,7 @@ class TestSpliteSelectedRows(unittest.TestCase):
# expected output selected rows
# expected output selected rows
expected_out0_rows
=
[
0
,
4
]
expected_out0_rows
=
[
0
,
4
]
expected_out1_rows
=
[
0
,
2
]
expected_out1_rows
=
[
0
,
2
]
expected_out2_rows
=
[]
expected_out4_rows
=
[
0
]
expected_out4_rows
=
[
0
]
op
=
Operator
(
op
=
Operator
(
...
@@ -75,6 +76,7 @@ class TestSpliteSelectedRows(unittest.TestCase):
...
@@ -75,6 +76,7 @@ class TestSpliteSelectedRows(unittest.TestCase):
self
.
assertEqual
(
outs
[
0
].
rows
(),
expected_out0_rows
)
self
.
assertEqual
(
outs
[
0
].
rows
(),
expected_out0_rows
)
self
.
assertEqual
(
outs
[
1
].
rows
(),
expected_out1_rows
)
self
.
assertEqual
(
outs
[
1
].
rows
(),
expected_out1_rows
)
self
.
assertEqual
(
outs
[
2
].
rows
(),
expected_out2_rows
)
self
.
assertEqual
(
outs
[
4
].
rows
(),
expected_out4_rows
)
self
.
assertEqual
(
outs
[
4
].
rows
(),
expected_out4_rows
)
self
.
assertEqual
(
outs
[
0
].
height
(),
height_sections
[
0
])
self
.
assertEqual
(
outs
[
0
].
height
(),
height_sections
[
0
])
...
@@ -84,6 +86,9 @@ class TestSpliteSelectedRows(unittest.TestCase):
...
@@ -84,6 +86,9 @@ class TestSpliteSelectedRows(unittest.TestCase):
self
.
assertAlmostEqual
(
4.0
,
np
.
array
(
outs
[
1
].
get_tensor
())[
1
,
1
])
self
.
assertAlmostEqual
(
4.0
,
np
.
array
(
outs
[
1
].
get_tensor
())[
1
,
1
])
self
.
assertAlmostEqual
(
8.0
,
np
.
array
(
outs
[
4
].
get_tensor
())[
0
,
1
])
self
.
assertAlmostEqual
(
8.0
,
np
.
array
(
outs
[
4
].
get_tensor
())[
0
,
1
])
self
.
assertEqual
(
outs
[
2
].
numel
(),
0
)
self
.
assertEqual
(
outs
[
3
].
numel
(),
0
)
def
check_grad_with_place
(
self
,
place
):
def
check_grad_with_place
(
self
,
place
):
scope
=
core
.
Scope
()
scope
=
core
.
Scope
()
height
=
10
height
=
10
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录