提交 b22d638d 编写于 作者: Y Yu Yang

Speed up SizeOfType

test=develop
上级 f0c0bf32
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type.h"
#include <stdint.h> #include <stdint.h>
#include <map>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
...@@ -23,10 +24,10 @@ namespace paddle { ...@@ -23,10 +24,10 @@ namespace paddle {
namespace framework { namespace framework {
struct DataTypeMap { struct DataTypeMap {
std::unordered_map<std::type_index, proto::VarType::Type> cpp_to_proto_; std::map<const char*, proto::VarType::Type> cpp_to_proto_;
std::unordered_map<int, std::type_index> proto_to_cpp_; std::unordered_map<int, std::type_index> proto_to_cpp_;
std::unordered_map<int, std::string> proto_to_str_; std::unordered_map<int, std::string> proto_to_str_;
std::unordered_map<std::type_index, size_t> cpp_to_size_; std::map<const char* /*name pointer*/, size_t> cpp_to_size_;
}; };
static DataTypeMap* InitDataTypeMap(); static DataTypeMap* InitDataTypeMap();
...@@ -43,9 +44,9 @@ static inline void RegisterType(DataTypeMap* map, ...@@ -43,9 +44,9 @@ static inline void RegisterType(DataTypeMap* map,
proto::VarType::Type proto_type, proto::VarType::Type proto_type,
const std::string& name) { const std::string& name) {
map->proto_to_cpp_.emplace(static_cast<int>(proto_type), typeid(T)); map->proto_to_cpp_.emplace(static_cast<int>(proto_type), typeid(T));
map->cpp_to_proto_.emplace(typeid(T), proto_type); map->cpp_to_proto_.emplace(typeid(T).name(), proto_type);
map->proto_to_str_.emplace(static_cast<int>(proto_type), name); map->proto_to_str_.emplace(static_cast<int>(proto_type), name);
map->cpp_to_size_.emplace(typeid(T), sizeof(T)); map->cpp_to_size_.emplace(typeid(T).name(), sizeof(T));
} }
static DataTypeMap* InitDataTypeMap() { static DataTypeMap* InitDataTypeMap() {
...@@ -71,7 +72,7 @@ static DataTypeMap* InitDataTypeMap() { ...@@ -71,7 +72,7 @@ static DataTypeMap* InitDataTypeMap() {
} }
proto::VarType::Type ToDataType(std::type_index type) { proto::VarType::Type ToDataType(std::type_index type) {
auto it = gDataTypeMap().cpp_to_proto_.find(type); auto it = gDataTypeMap().cpp_to_proto_.find(type.name());
if (it != gDataTypeMap().cpp_to_proto_.end()) { if (it != gDataTypeMap().cpp_to_proto_.end()) {
return it->second; return it->second;
} }
...@@ -97,8 +98,8 @@ std::string DataTypeToString(const proto::VarType::Type type) { ...@@ -97,8 +98,8 @@ std::string DataTypeToString(const proto::VarType::Type type) {
} }
size_t SizeOfType(std::type_index type) { size_t SizeOfType(std::type_index type) {
auto it = gDataTypeMap().cpp_to_size_.find(type); auto it = gDataTypeMap().cpp_to_size_.find(type.name());
if (it != gDataTypeMap().cpp_to_size_.end()) { if (LIKELY(it != gDataTypeMap().cpp_to_size_.end())) {
return it->second; return it->second;
} }
PADDLE_THROW("Not support %s as tensor type", type.name()); PADDLE_THROW("Not support %s as tensor type", type.name());
......
...@@ -31,9 +31,9 @@ limitations under the License. */ ...@@ -31,9 +31,9 @@ limitations under the License. */
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
#ifdef WITH_GPERFTOOLS #ifdef WITH_GPERFTOOLS
#include "google/gperftools.h" #include "gperftools/profiler.h"
#endif #endif
DEFINE_string(PEProfileFName, "", DEFINE_string(pe_profile_fname, "",
"Profiler filename for PE, which generated by gperftools." "Profiler filename for PE, which generated by gperftools."
"Only valid when compiled `WITH_PRIFILER=ON`. Empty if disable."); "Only valid when compiled `WITH_PRIFILER=ON`. Empty if disable.");
...@@ -45,14 +45,14 @@ class ParallelExecutorPrivate { ...@@ -45,14 +45,14 @@ class ParallelExecutorPrivate {
public: public:
explicit ParallelExecutorPrivate(const std::vector<platform::Place> &places) explicit ParallelExecutorPrivate(const std::vector<platform::Place> &places)
: places_(places) { : places_(places) {
if (!FLAGS_PEProfileFName.empty()) { if (!FLAGS_pe_profile_fname.empty()) {
std::call_once(gProfileOnce, [] { std::call_once(gProfileOnce, [] {
#ifdef WITH_GPERFTOOLS #ifdef WITH_GPERFTOOLS
ProfilerStart(FLAGS_PEProfileFName.c_str()); ProfilerStart(FLAGS_pe_profile_fname.c_str());
gProfileStarted = true; gProfileStarted = true;
#else #else
LOG(WARNING) << "Paddle is not compiled with gperftools. " LOG(WARNING) << "Paddle is not compiled with gperftools. "
"FLAGS_PEProfileFName will be ignored"; "FLAGS_pe_profile_fname will be ignored";
#endif #endif
}); });
} }
......
...@@ -62,22 +62,34 @@ inline std::string demangle(std::string name) { return name; } ...@@ -62,22 +62,34 @@ inline std::string demangle(std::string name) { return name; }
#endif #endif
struct EnforceNotMet : public std::exception { struct EnforceNotMet : public std::exception {
std::exception_ptr exp_;
std::string err_str_; std::string err_str_;
EnforceNotMet(std::exception_ptr e, const char* f, int l) : exp_(e) { EnforceNotMet(std::exception_ptr e, const char* f, int l) {
static constexpr int TRACE_STACK_LIMIT = 100;
try { try {
std::rethrow_exception(exp_); std::rethrow_exception(e);
} catch (const std::exception& exp) { } catch (std::exception& e) {
Init(e.what(), f, l);
}
}
template <typename... ARGS>
EnforceNotMet(const char* f, int l, ARGS... args) {
Init(string::Sprintf(args...), f, l);
}
const char* what() const noexcept override { return err_str_.c_str(); }
private:
template <typename StrType>
inline void Init(StrType what, const char* f, int l) {
static constexpr int TRACE_STACK_LIMIT = 100;
std::ostringstream sout; std::ostringstream sout;
sout << string::Sprintf("%s at [%s:%d]", exp.what(), f, l) << std::endl; sout << string::Sprintf("%s at [%s:%d]", what, f, l) << std::endl;
sout << "PaddlePaddle Call Stacks: " << std::endl; sout << "PaddlePaddle Call Stacks: " << std::endl;
#if !defined(_WIN32) #if !defined(_WIN32)
void* call_stack[TRACE_STACK_LIMIT]; void* call_stack[TRACE_STACK_LIMIT];
auto size = backtrace(call_stack, TRACE_STACK_LIMIT); auto size = backtrace(call_stack, TRACE_STACK_LIMIT);
auto symbols = backtrace_symbols(call_stack, size); auto symbols = backtrace_symbols(call_stack, size);
Dl_info info; Dl_info info;
for (int i = 0; i < size; ++i) { for (int i = 0; i < size; ++i) {
if (dladdr(call_stack[i], &info) && info.dli_sname) { if (dladdr(call_stack[i], &info) && info.dli_sname) {
...@@ -85,8 +97,8 @@ struct EnforceNotMet : public std::exception { ...@@ -85,8 +97,8 @@ struct EnforceNotMet : public std::exception {
auto addr_offset = static_cast<char*>(call_stack[i]) - auto addr_offset = static_cast<char*>(call_stack[i]) -
static_cast<char*>(info.dli_saddr); static_cast<char*>(info.dli_saddr);
sout << string::Sprintf("%-3d %*0p %s + %zd\n", i, sout << string::Sprintf("%-3d %*0p %s + %zd\n", i,
2 + sizeof(void*) * 2, call_stack[i], 2 + sizeof(void*) * 2, call_stack[i], demangled,
demangled, addr_offset); addr_offset);
} else { } else {
sout << string::Sprintf("%-3d %*0p\n", i, 2 + sizeof(void*) * 2, sout << string::Sprintf("%-3d %*0p\n", i, 2 + sizeof(void*) * 2,
call_stack[i]); call_stack[i]);
...@@ -98,9 +110,6 @@ struct EnforceNotMet : public std::exception { ...@@ -98,9 +110,6 @@ struct EnforceNotMet : public std::exception {
#endif #endif
err_str_ = sout.str(); err_str_ = sout.str();
} }
}
const char* what() const noexcept { return err_str_.c_str(); }
}; };
struct EOFException : public std::exception { struct EOFException : public std::exception {
...@@ -243,12 +252,7 @@ inline void throw_on_error(T e) { ...@@ -243,12 +252,7 @@ inline void throw_on_error(T e) {
} }
#define PADDLE_THROW(...) \ #define PADDLE_THROW(...) \
do { \ throw ::paddle::platform::EnforceNotMet(__FILE__, __LINE__, __VA_ARGS__)
throw ::paddle::platform::EnforceNotMet( \
std::make_exception_ptr( \
std::runtime_error(paddle::string::Sprintf(__VA_ARGS__))), \
__FILE__, __LINE__); \
} while (false)
#ifndef REPLACE_ENFORCE_GLOG #ifndef REPLACE_ENFORCE_GLOG
#define PADDLE_ENFORCE(...) \ #define PADDLE_ENFORCE(...) \
......
...@@ -127,7 +127,8 @@ def __bootstrap__(): ...@@ -127,7 +127,8 @@ def __bootstrap__():
'use_ngraph', 'initial_cpu_memory_in_mb', 'init_allocated_mem', 'use_ngraph', 'initial_cpu_memory_in_mb', 'init_allocated_mem',
'free_idle_memory', 'paddle_num_threads', "dist_threadpool_size", 'free_idle_memory', 'paddle_num_threads', "dist_threadpool_size",
'eager_delete_tensor_gb', 'allocator_strategy', 'eager_delete_tensor_gb', 'allocator_strategy',
'reader_queue_speed_test_mode', 'print_sub_graph_dir' 'reader_queue_speed_test_mode', 'print_sub_graph_dir',
'pe_profile_fname'
] ]
if 'Darwin' not in sysstr: if 'Darwin' not in sysstr:
read_env_flags.append('use_pinned_memory') read_env_flags.append('use_pinned_memory')
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册