提交 81520a24 编写于 作者: Y Yu Yang

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into feature/refine_eigen_tensor

...@@ -54,7 +54,7 @@ option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON) ...@@ -54,7 +54,7 @@ option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON)
option(WITH_DOUBLE "Compile PaddlePaddle with double precision" OFF) option(WITH_DOUBLE "Compile PaddlePaddle with double precision" OFF)
option(WITH_RDMA "Compile PaddlePaddle with RDMA support" OFF) option(WITH_RDMA "Compile PaddlePaddle with RDMA support" OFF)
option(WITH_TIMER "Compile PaddlePaddle with stats timer" OFF) option(WITH_TIMER "Compile PaddlePaddle with stats timer" OFF)
option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler" OFF) option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools" OFF)
option(WITH_DOC "Compile PaddlePaddle with documentation" OFF) option(WITH_DOC "Compile PaddlePaddle with documentation" OFF)
option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF) option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF)
option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF) option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF)
...@@ -254,6 +254,12 @@ elseif() ...@@ -254,6 +254,12 @@ elseif()
set(WITH_ANAKIN OFF CACHE STRING "Anakin is used in MKL only now." FORCE) set(WITH_ANAKIN OFF CACHE STRING "Anakin is used in MKL only now." FORCE)
endif() endif()
if (WITH_PROFILER)
find_package(Gperftools REQUIRED)
include_directories(${GPERFTOOLS_INCLUDE_DIR})
add_definitions(-DWITH_GPERFTOOLS)
endif()
include(generic) # simplify cmake module include(generic) # simplify cmake module
include(package) # set paddle packages include(package) # set paddle packages
include(ccache) # set ccache for compilation include(ccache) # set ccache for compilation
......
# Tries to find Gperftools.
#
# Usage of this module as follows:
#
# find_package(Gperftools)
#
# Variables used by this module, they can change the default behaviour and need
# to be set before calling find_package:
#
# Gperftools_ROOT_DIR Set this variable to the root installation of
# Gperftools if the module has problems finding
# the proper installation path.
#
# Variables defined by this module:
#
# GPERFTOOLS_FOUND System has Gperftools libs/headers
# GPERFTOOLS_LIBRARIES The Gperftools libraries (tcmalloc & profiler)
# GPERFTOOLS_INCLUDE_DIR The location of Gperftools headers
find_library(GPERFTOOLS_TCMALLOC
NAMES tcmalloc
HINTS ${Gperftools_ROOT_DIR}/lib)
find_library(GPERFTOOLS_PROFILER
NAMES profiler
HINTS ${Gperftools_ROOT_DIR}/lib)
find_library(GPERFTOOLS_TCMALLOC_AND_PROFILER
NAMES tcmalloc_and_profiler
HINTS ${Gperftools_ROOT_DIR}/lib)
find_path(GPERFTOOLS_INCLUDE_DIR
NAMES gperftools/heap-profiler.h
HINTS ${Gperftools_ROOT_DIR}/include)
set(GPERFTOOLS_LIBRARIES ${GPERFTOOLS_TCMALLOC_AND_PROFILER})
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(
Gperftools
DEFAULT_MSG
GPERFTOOLS_LIBRARIES
GPERFTOOLS_INCLUDE_DIR)
mark_as_advanced(
Gperftools_ROOT_DIR
GPERFTOOLS_TCMALLOC
GPERFTOOLS_PROFILER
GPERFTOOLS_TCMALLOC_AND_PROFILER
GPERFTOOLS_LIBRARIES
GPERFTOOLS_INCLUDE_DIR)
# create IMPORTED targets
if (Gperftools_FOUND AND NOT TARGET gperftools::tcmalloc)
add_library(gperftools::tcmalloc UNKNOWN IMPORTED)
set_target_properties(gperftools::tcmalloc PROPERTIES
IMPORTED_LOCATION ${GPERFTOOLS_TCMALLOC}
INTERFACE_INCLUDE_DIRECTORIES "${GPERFTOOLS_INCLUDE_DIR}")
add_library(gperftools::profiler UNKNOWN IMPORTED)
set_target_properties(gperftools::profiler PROPERTIES
IMPORTED_LOCATION ${GPERFTOOLS_PROFILER}
INTERFACE_INCLUDE_DIRECTORIES "${GPERFTOOLS_INCLUDE_DIR}")
endif()
...@@ -110,6 +110,14 @@ function(find_fluid_modules TARGET_NAME) ...@@ -110,6 +110,14 @@ function(find_fluid_modules TARGET_NAME)
endif() endif()
endfunction(find_fluid_modules) endfunction(find_fluid_modules)
function(common_link TARGET_NAME)
if (WITH_PROFILER)
target_link_libraries(${TARGET_NAME} gperftools::profiler)
endif()
endfunction()
# find all third_party modules is used for paddle static library # find all third_party modules is used for paddle static library
# for reduce the dependency when building the inference libs. # for reduce the dependency when building the inference libs.
set_property(GLOBAL PROPERTY FLUID_THIRD_PARTY) set_property(GLOBAL PROPERTY FLUID_THIRD_PARTY)
...@@ -274,6 +282,7 @@ function(cc_library TARGET_NAME) ...@@ -274,6 +282,7 @@ function(cc_library TARGET_NAME)
endif() endif()
target_link_libraries(${TARGET_NAME} ${cc_library_DEPS}) target_link_libraries(${TARGET_NAME} ${cc_library_DEPS})
add_dependencies(${TARGET_NAME} ${cc_library_DEPS}) add_dependencies(${TARGET_NAME} ${cc_library_DEPS})
common_link(${TARGET_NAME})
endif() endif()
# cpplint code style # cpplint code style
...@@ -340,6 +349,7 @@ function(cc_binary TARGET_NAME) ...@@ -340,6 +349,7 @@ function(cc_binary TARGET_NAME)
if(cc_binary_DEPS) if(cc_binary_DEPS)
target_link_libraries(${TARGET_NAME} ${cc_binary_DEPS}) target_link_libraries(${TARGET_NAME} ${cc_binary_DEPS})
add_dependencies(${TARGET_NAME} ${cc_binary_DEPS}) add_dependencies(${TARGET_NAME} ${cc_binary_DEPS})
common_link(${TARGET_NAME})
endif() endif()
endfunction(cc_binary) endfunction(cc_binary)
...@@ -362,6 +372,7 @@ function(cc_test TARGET_NAME) ...@@ -362,6 +372,7 @@ function(cc_test TARGET_NAME)
target_link_libraries(${TARGET_NAME} ${win32_deps}) target_link_libraries(${TARGET_NAME} ${win32_deps})
endif(WIN32) endif(WIN32)
add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
common_link(${TARGET_NAME})
add_test(NAME ${TARGET_NAME} add_test(NAME ${TARGET_NAME}
COMMAND ${TARGET_NAME} ${cc_test_ARGS} COMMAND ${TARGET_NAME} ${cc_test_ARGS}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
...@@ -420,6 +431,7 @@ function(nv_binary TARGET_NAME) ...@@ -420,6 +431,7 @@ function(nv_binary TARGET_NAME)
if(nv_binary_DEPS) if(nv_binary_DEPS)
target_link_libraries(${TARGET_NAME} ${nv_binary_DEPS}) target_link_libraries(${TARGET_NAME} ${nv_binary_DEPS})
add_dependencies(${TARGET_NAME} ${nv_binary_DEPS}) add_dependencies(${TARGET_NAME} ${nv_binary_DEPS})
common_link(${TARGET_NAME})
endif() endif()
endif() endif()
endfunction(nv_binary) endfunction(nv_binary)
...@@ -433,6 +445,7 @@ function(nv_test TARGET_NAME) ...@@ -433,6 +445,7 @@ function(nv_test TARGET_NAME)
cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS}) cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS})
target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
common_link(${TARGET_NAME})
add_test(${TARGET_NAME} ${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME})
if (nv_test_SERIAL) if (nv_test_SERIAL)
set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1) set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1)
...@@ -499,6 +512,7 @@ function(hip_binary TARGET_NAME) ...@@ -499,6 +512,7 @@ function(hip_binary TARGET_NAME)
if(hip_binary_DEPS) if(hip_binary_DEPS)
target_link_libraries(${TARGET_NAME} ${hip_binary_DEPS}) target_link_libraries(${TARGET_NAME} ${hip_binary_DEPS})
add_dependencies(${TARGET_NAME} ${hip_binary_DEPS}) add_dependencies(${TARGET_NAME} ${hip_binary_DEPS})
common_link(${TARGET_NAME})
endif() endif()
endif() endif()
endfunction(hip_binary) endfunction(hip_binary)
...@@ -518,6 +532,7 @@ function(hip_test TARGET_NAME) ...@@ -518,6 +532,7 @@ function(hip_test TARGET_NAME)
set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE HIP) set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE HIP)
target_link_libraries(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main memory gtest gflags) target_link_libraries(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main memory gtest gflags)
add_dependencies(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main memory gtest gflags) add_dependencies(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main memory gtest gflags)
common_link(${TARGET_NAME})
add_test(${TARGET_NAME} ${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME})
endif() endif()
endfunction(hip_test) endfunction(hip_test)
...@@ -560,6 +575,7 @@ function(go_library TARGET_NAME) ...@@ -560,6 +575,7 @@ function(go_library TARGET_NAME)
endif() endif()
if(go_library_DEPS) if(go_library_DEPS)
add_dependencies(${TARGET_NAME} ${go_library_DEPS}) add_dependencies(${TARGET_NAME} ${go_library_DEPS})
common_link(${TARGET_NAME})
endif(go_library_DEPS) endif(go_library_DEPS)
# The "source file" of the library is `${dummyfile}` which never # The "source file" of the library is `${dummyfile}` which never
......
...@@ -30,13 +30,36 @@ limitations under the License. */ ...@@ -30,13 +30,36 @@ limitations under the License. */
#include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h" #include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
#ifdef WITH_GPERFTOOLS
#include "gperftools/profiler.h"
#endif
DEFINE_string(pe_profile_fname, "",
"Profiler filename for PE, which generated by gperftools."
"Only valid when compiled `WITH_PRIFILER=ON`. Empty if disable.");
namespace paddle { namespace paddle {
namespace framework { namespace framework {
static std::once_flag gProfileOnce;
#ifdef WITH_GPERFTOOLS
static bool gProfileStarted = false;
#endif
class ParallelExecutorPrivate { class ParallelExecutorPrivate {
public: public:
explicit ParallelExecutorPrivate(const std::vector<platform::Place> &places) explicit ParallelExecutorPrivate(const std::vector<platform::Place> &places)
: places_(places) {} : places_(places) {
if (!FLAGS_pe_profile_fname.empty()) {
std::call_once(gProfileOnce, [] {
#ifdef WITH_GPERFTOOLS
ProfilerStart(FLAGS_pe_profile_fname.c_str());
gProfileStarted = true;
#else
LOG(WARNING) << "Paddle is not compiled with gperftools. "
"FLAGS_pe_profile_fname will be ignored";
#endif
});
}
}
~ParallelExecutorPrivate() { ~ParallelExecutorPrivate() {
if (own_local_scope_) { if (own_local_scope_) {
...@@ -270,6 +293,12 @@ void ParallelExecutor::BCastParamsToDevices( ...@@ -270,6 +293,12 @@ void ParallelExecutor::BCastParamsToDevices(
void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors, void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors,
const std::string &fetched_var_name) { const std::string &fetched_var_name) {
#ifdef WITH_GPERFTOOLS
if (gProfileStarted) {
ProfilerFlush();
}
#endif
platform::RecordBlock b(0); platform::RecordBlock b(0);
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
if (!gcs_.empty()) { if (!gcs_.empty()) {
......
...@@ -79,16 +79,16 @@ class LayerNormKernelImpl : public LayerNormKernel<T> { ...@@ -79,16 +79,16 @@ class LayerNormKernelImpl : public LayerNormKernel<T> {
} }
}; };
#define INTRIAVX_FLOAT(isa, block) \ #define INTRIAVX_FLOAT(isa, jit_block) \
template <> \ template <> \
LayerNormKernelImpl<float, isa, block>::LayerNormKernelImpl(int right) \ LayerNormKernelImpl<float, isa, jit_block>::LayerNormKernelImpl(int right) \
: LayerNormKernel<float>() { \ : LayerNormKernel<float>() { \
this->num_ = right; \ this->num_ = right; \
this->rest_ = this->num_ % YMM_FLOAT_BLOCK; \ this->rest_ = this->num_ % YMM_FLOAT_BLOCK; \
this->end_ = this->num_ - this->rest_; \ this->end_ = this->num_ - this->rest_; \
} \ } \
template <> \ template <> \
void LayerNormKernelImpl<float, platform::avx, block>::Compute( \ void LayerNormKernelImpl<float, isa, jit_block>::Compute( \
float* x, float* out, float* mean, float* var, const float* scale, \ float* x, float* out, float* mean, float* var, const float* scale, \
const float* bias, int height, const float epsilon) const { \ const float* bias, int height, const float epsilon) const { \
__m256 sum; \ __m256 sum; \
...@@ -97,6 +97,7 @@ class LayerNormKernelImpl : public LayerNormKernel<T> { ...@@ -97,6 +97,7 @@ class LayerNormKernelImpl : public LayerNormKernel<T> {
__m256 tmp; \ __m256 tmp; \
size_t offset; \ size_t offset; \
size_t j; \ size_t j; \
size_t block = YMM_FLOAT_BLOCK; \
__m256 reverse_num_vec = \ __m256 reverse_num_vec = \
_mm256_div_ps(_mm256_set1_ps(1.0), _mm256_set1_ps(this->num_)); \ _mm256_div_ps(_mm256_set1_ps(1.0), _mm256_set1_ps(this->num_)); \
__m256 epsilon_vec = _mm256_set1_ps(epsilon); \ __m256 epsilon_vec = _mm256_set1_ps(epsilon); \
...@@ -221,12 +222,14 @@ INTRIAVX_FLOAT(platform::avx, kEQ8); ...@@ -221,12 +222,14 @@ INTRIAVX_FLOAT(platform::avx, kEQ8);
INTRIAVX_FLOAT(platform::avx, kGT8LT16); INTRIAVX_FLOAT(platform::avx, kGT8LT16);
INTRIAVX_FLOAT(platform::avx, kEQ16); INTRIAVX_FLOAT(platform::avx, kEQ16);
INTRIAVX_FLOAT(platform::avx, kGT16); INTRIAVX_FLOAT(platform::avx, kGT16);
#endif
#ifdef __AVX2__
INTRIAVX_FLOAT(platform::avx2, kEQ8); INTRIAVX_FLOAT(platform::avx2, kEQ8);
INTRIAVX_FLOAT(platform::avx2, kGT8LT16); INTRIAVX_FLOAT(platform::avx2, kGT8LT16);
INTRIAVX_FLOAT(platform::avx2, kEQ16); INTRIAVX_FLOAT(platform::avx2, kEQ16);
INTRIAVX_FLOAT(platform::avx2, kGT16); INTRIAVX_FLOAT(platform::avx2, kGT16);
INTRIAVX_FLOAT(platform::avx512f, kEQ8);
INTRIAVX_FLOAT(platform::avx512f, kGT8LT16);
INTRIAVX_FLOAT(platform::avx512f, kEQ16);
INTRIAVX_FLOAT(platform::avx512f, kGT16);
#endif #endif
#undef INTRIAVX_FLOAT #undef INTRIAVX_FLOAT
......
...@@ -72,10 +72,11 @@ class SplitSelectedRowsOpKernel : public framework::OpKernel<T> { ...@@ -72,10 +72,11 @@ class SplitSelectedRowsOpKernel : public framework::OpKernel<T> {
for (size_t i = 0; i < outs_rows_idx.size(); ++i) { for (size_t i = 0; i < outs_rows_idx.size(); ++i) {
auto rows_idx = outs_rows_idx[i]; auto rows_idx = outs_rows_idx[i];
outs[i]->set_height(height_sections[i]); outs[i]->set_height(height_sections[i]);
auto dims = x->GetCompleteDims();
dims[0] = rows_idx.size();
outs[i]->mutable_value()->mutable_data<T>(dims, x->place());
outs[i]->mutable_rows()->clear();
if (rows_idx.size() > 0) { if (rows_idx.size() > 0) {
auto dims = x->GetCompleteDims();
dims[0] = rows_idx.size();
outs[i]->mutable_value()->mutable_data<T>(dims, x->place());
for (auto idx : rows_idx) { for (auto idx : rows_idx) {
outs[i]->mutable_rows()->push_back(idx - abs_sections[i]); outs[i]->mutable_rows()->push_back(idx - abs_sections[i]);
} }
...@@ -98,6 +99,8 @@ class SplitSelectedRowsOpKernel : public framework::OpKernel<T> { ...@@ -98,6 +99,8 @@ class SplitSelectedRowsOpKernel : public framework::OpKernel<T> {
} }
} }
} }
PADDLE_ENFORCE_EQ(rows_idx.size(), outs[i]->rows().size(),
"rows should has the same size with tensor dim 0");
} }
} }
}; };
......
...@@ -62,45 +62,54 @@ inline std::string demangle(std::string name) { return name; } ...@@ -62,45 +62,54 @@ inline std::string demangle(std::string name) { return name; }
#endif #endif
struct EnforceNotMet : public std::exception { struct EnforceNotMet : public std::exception {
std::exception_ptr exp_;
std::string err_str_; std::string err_str_;
EnforceNotMet(std::exception_ptr e, const char* f, int l) : exp_(e) { EnforceNotMet(std::exception_ptr e, const char* f, int l) {
static constexpr int TRACE_STACK_LIMIT = 100;
try { try {
std::rethrow_exception(exp_); std::rethrow_exception(e);
} catch (const std::exception& exp) { } catch (std::exception& e) {
std::ostringstream sout; Init(e.what(), f, l);
}
}
sout << string::Sprintf("%s at [%s:%d]", exp.what(), f, l) << std::endl; template <typename... ARGS>
sout << "PaddlePaddle Call Stacks: " << std::endl; EnforceNotMet(const char* f, int l, ARGS... args) {
Init(string::Sprintf(args...), f, l);
}
const char* what() const noexcept override { return err_str_.c_str(); }
private:
template <typename StrType>
inline void Init(StrType what, const char* f, int l) {
static constexpr int TRACE_STACK_LIMIT = 100;
std::ostringstream sout;
sout << string::Sprintf("%s at [%s:%d]", what, f, l) << std::endl;
sout << "PaddlePaddle Call Stacks: " << std::endl;
#if !defined(_WIN32) #if !defined(_WIN32)
void* call_stack[TRACE_STACK_LIMIT]; void* call_stack[TRACE_STACK_LIMIT];
auto size = backtrace(call_stack, TRACE_STACK_LIMIT); auto size = backtrace(call_stack, TRACE_STACK_LIMIT);
auto symbols = backtrace_symbols(call_stack, size); auto symbols = backtrace_symbols(call_stack, size);
Dl_info info;
Dl_info info; for (int i = 0; i < size; ++i) {
for (int i = 0; i < size; ++i) { if (dladdr(call_stack[i], &info) && info.dli_sname) {
if (dladdr(call_stack[i], &info) && info.dli_sname) { auto demangled = demangle(info.dli_sname);
auto demangled = demangle(info.dli_sname); auto addr_offset = static_cast<char*>(call_stack[i]) -
auto addr_offset = static_cast<char*>(call_stack[i]) - static_cast<char*>(info.dli_saddr);
static_cast<char*>(info.dli_saddr); sout << string::Sprintf("%-3d %*0p %s + %zd\n", i,
sout << string::Sprintf("%-3d %*0p %s + %zd\n", i, 2 + sizeof(void*) * 2, call_stack[i], demangled,
2 + sizeof(void*) * 2, call_stack[i], addr_offset);
demangled, addr_offset); } else {
} else { sout << string::Sprintf("%-3d %*0p\n", i, 2 + sizeof(void*) * 2,
sout << string::Sprintf("%-3d %*0p\n", i, 2 + sizeof(void*) * 2, call_stack[i]);
call_stack[i]);
}
} }
free(symbols); }
free(symbols);
#else #else
sout << "Windows not support stack backtrace yet."; sout << "Windows not support stack backtrace yet.";
#endif #endif
err_str_ = sout.str(); err_str_ = sout.str();
}
} }
const char* what() const noexcept { return err_str_.c_str(); }
}; };
struct EOFException : public std::exception { struct EOFException : public std::exception {
...@@ -242,13 +251,8 @@ inline void throw_on_error(T e) { ...@@ -242,13 +251,8 @@ inline void throw_on_error(T e) {
throw_on_error(e, ""); throw_on_error(e, "");
} }
#define PADDLE_THROW(...) \ #define PADDLE_THROW(...) \
do { \ throw ::paddle::platform::EnforceNotMet(__FILE__, __LINE__, __VA_ARGS__)
throw ::paddle::platform::EnforceNotMet( \
std::make_exception_ptr( \
std::runtime_error(paddle::string::Sprintf(__VA_ARGS__))), \
__FILE__, __LINE__); \
} while (false)
#ifndef REPLACE_ENFORCE_GLOG #ifndef REPLACE_ENFORCE_GLOG
#define PADDLE_ENFORCE(...) \ #define PADDLE_ENFORCE(...) \
......
...@@ -336,6 +336,8 @@ PYBIND11_MODULE(core, m) { ...@@ -336,6 +336,8 @@ PYBIND11_MODULE(core, m) {
.def("get_tensor", .def("get_tensor",
[](SelectedRows &self) { return self.mutable_value(); }, [](SelectedRows &self) { return self.mutable_value(); },
py::return_value_policy::reference) py::return_value_policy::reference)
.def("numel",
[](SelectedRows &self) -> int64_t { return self.value().numel(); })
.def("set_height", &SelectedRows::set_height) .def("set_height", &SelectedRows::set_height)
.def("height", &SelectedRows::height) .def("height", &SelectedRows::height)
.def("set_rows", .def("set_rows",
......
...@@ -127,7 +127,8 @@ def __bootstrap__(): ...@@ -127,7 +127,8 @@ def __bootstrap__():
'use_ngraph', 'initial_cpu_memory_in_mb', 'init_allocated_mem', 'use_ngraph', 'initial_cpu_memory_in_mb', 'init_allocated_mem',
'free_idle_memory', 'paddle_num_threads', "dist_threadpool_size", 'free_idle_memory', 'paddle_num_threads', "dist_threadpool_size",
'eager_delete_tensor_gb', 'allocator_strategy', 'eager_delete_tensor_gb', 'allocator_strategy',
'reader_queue_speed_test_mode', 'print_sub_graph_dir' 'reader_queue_speed_test_mode', 'print_sub_graph_dir',
'pe_profile_fname'
] ]
if 'Darwin' not in sysstr: if 'Darwin' not in sysstr:
read_env_flags.append('use_pinned_memory') read_env_flags.append('use_pinned_memory')
......
...@@ -48,6 +48,7 @@ class WeightedAverage(object): ...@@ -48,6 +48,7 @@ class WeightedAverage(object):
Examples: Examples:
.. code-block:: python .. code-block:: python
avg = fluid.average.WeightedAverage() avg = fluid.average.WeightedAverage()
avg.add(value=2.0, weight=1) avg.add(value=2.0, weight=1)
avg.add(value=4.0, weight=2) avg.add(value=4.0, weight=2)
......
...@@ -63,6 +63,7 @@ class TestSpliteSelectedRows(unittest.TestCase): ...@@ -63,6 +63,7 @@ class TestSpliteSelectedRows(unittest.TestCase):
# expected output selected rows # expected output selected rows
expected_out0_rows = [0, 4] expected_out0_rows = [0, 4]
expected_out1_rows = [0, 2] expected_out1_rows = [0, 2]
expected_out2_rows = []
expected_out4_rows = [0] expected_out4_rows = [0]
op = Operator( op = Operator(
...@@ -75,6 +76,7 @@ class TestSpliteSelectedRows(unittest.TestCase): ...@@ -75,6 +76,7 @@ class TestSpliteSelectedRows(unittest.TestCase):
self.assertEqual(outs[0].rows(), expected_out0_rows) self.assertEqual(outs[0].rows(), expected_out0_rows)
self.assertEqual(outs[1].rows(), expected_out1_rows) self.assertEqual(outs[1].rows(), expected_out1_rows)
self.assertEqual(outs[2].rows(), expected_out2_rows)
self.assertEqual(outs[4].rows(), expected_out4_rows) self.assertEqual(outs[4].rows(), expected_out4_rows)
self.assertEqual(outs[0].height(), height_sections[0]) self.assertEqual(outs[0].height(), height_sections[0])
...@@ -84,6 +86,9 @@ class TestSpliteSelectedRows(unittest.TestCase): ...@@ -84,6 +86,9 @@ class TestSpliteSelectedRows(unittest.TestCase):
self.assertAlmostEqual(4.0, np.array(outs[1].get_tensor())[1, 1]) self.assertAlmostEqual(4.0, np.array(outs[1].get_tensor())[1, 1])
self.assertAlmostEqual(8.0, np.array(outs[4].get_tensor())[0, 1]) self.assertAlmostEqual(8.0, np.array(outs[4].get_tensor())[0, 1])
self.assertEqual(outs[2].numel(), 0)
self.assertEqual(outs[3].numel(), 0)
def check_grad_with_place(self, place): def check_grad_with_place(self, place):
scope = core.Scope() scope = core.Scope()
height = 10 height = 10
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册