提交 b22d638d 编写于 作者: Y Yu Yang

Speed up SizeOfType

test=develop
上级 f0c0bf32
......@@ -14,6 +14,7 @@
#include "paddle/fluid/framework/data_type.h"
#include <stdint.h>
#include <map>
#include <string>
#include <unordered_map>
......@@ -23,10 +24,10 @@ namespace paddle {
namespace framework {
struct DataTypeMap {
std::unordered_map<std::type_index, proto::VarType::Type> cpp_to_proto_;
std::map<const char*, proto::VarType::Type> cpp_to_proto_;
std::unordered_map<int, std::type_index> proto_to_cpp_;
std::unordered_map<int, std::string> proto_to_str_;
std::unordered_map<std::type_index, size_t> cpp_to_size_;
std::map<const char* /*name pointer*/, size_t> cpp_to_size_;
};
static DataTypeMap* InitDataTypeMap();
......@@ -43,9 +44,9 @@ static inline void RegisterType(DataTypeMap* map,
proto::VarType::Type proto_type,
const std::string& name) {
map->proto_to_cpp_.emplace(static_cast<int>(proto_type), typeid(T));
map->cpp_to_proto_.emplace(typeid(T), proto_type);
map->cpp_to_proto_.emplace(typeid(T).name(), proto_type);
map->proto_to_str_.emplace(static_cast<int>(proto_type), name);
map->cpp_to_size_.emplace(typeid(T), sizeof(T));
map->cpp_to_size_.emplace(typeid(T).name(), sizeof(T));
}
static DataTypeMap* InitDataTypeMap() {
......@@ -71,7 +72,7 @@ static DataTypeMap* InitDataTypeMap() {
}
proto::VarType::Type ToDataType(std::type_index type) {
auto it = gDataTypeMap().cpp_to_proto_.find(type);
auto it = gDataTypeMap().cpp_to_proto_.find(type.name());
if (it != gDataTypeMap().cpp_to_proto_.end()) {
return it->second;
}
......@@ -97,8 +98,8 @@ std::string DataTypeToString(const proto::VarType::Type type) {
}
size_t SizeOfType(std::type_index type) {
auto it = gDataTypeMap().cpp_to_size_.find(type);
if (it != gDataTypeMap().cpp_to_size_.end()) {
auto it = gDataTypeMap().cpp_to_size_.find(type.name());
if (LIKELY(it != gDataTypeMap().cpp_to_size_.end())) {
return it->second;
}
PADDLE_THROW("Not support %s as tensor type", type.name());
......
......@@ -31,9 +31,9 @@ limitations under the License. */
#include "paddle/fluid/platform/profiler.h"
#ifdef WITH_GPERFTOOLS
#include "google/gperftools.h"
#include "gperftools/profiler.h"
#endif
DEFINE_string(PEProfileFName, "",
DEFINE_string(pe_profile_fname, "",
"Profiler filename for PE, which generated by gperftools."
"Only valid when compiled `WITH_PRIFILER=ON`. Empty if disable.");
......@@ -45,14 +45,14 @@ class ParallelExecutorPrivate {
public:
explicit ParallelExecutorPrivate(const std::vector<platform::Place> &places)
: places_(places) {
if (!FLAGS_PEProfileFName.empty()) {
if (!FLAGS_pe_profile_fname.empty()) {
std::call_once(gProfileOnce, [] {
#ifdef WITH_GPERFTOOLS
ProfilerStart(FLAGS_PEProfileFName.c_str());
ProfilerStart(FLAGS_pe_profile_fname.c_str());
gProfileStarted = true;
#else
LOG(WARNING) << "Paddle is not compiled with gperftools. "
"FLAGS_PEProfileFName will be ignored";
"FLAGS_pe_profile_fname will be ignored";
#endif
});
}
......
......@@ -62,45 +62,54 @@ inline std::string demangle(std::string name) { return name; }
#endif
struct EnforceNotMet : public std::exception {
std::exception_ptr exp_;
std::string err_str_;
EnforceNotMet(std::exception_ptr e, const char* f, int l) : exp_(e) {
static constexpr int TRACE_STACK_LIMIT = 100;
EnforceNotMet(std::exception_ptr e, const char* f, int l) {
try {
std::rethrow_exception(exp_);
} catch (const std::exception& exp) {
std::ostringstream sout;
std::rethrow_exception(e);
} catch (std::exception& e) {
Init(e.what(), f, l);
}
}
sout << string::Sprintf("%s at [%s:%d]", exp.what(), f, l) << std::endl;
sout << "PaddlePaddle Call Stacks: " << std::endl;
template <typename... ARGS>
EnforceNotMet(const char* f, int l, ARGS... args) {
Init(string::Sprintf(args...), f, l);
}
const char* what() const noexcept override { return err_str_.c_str(); }
private:
template <typename StrType>
inline void Init(StrType what, const char* f, int l) {
static constexpr int TRACE_STACK_LIMIT = 100;
std::ostringstream sout;
sout << string::Sprintf("%s at [%s:%d]", what, f, l) << std::endl;
sout << "PaddlePaddle Call Stacks: " << std::endl;
#if !defined(_WIN32)
void* call_stack[TRACE_STACK_LIMIT];
auto size = backtrace(call_stack, TRACE_STACK_LIMIT);
auto symbols = backtrace_symbols(call_stack, size);
Dl_info info;
for (int i = 0; i < size; ++i) {
if (dladdr(call_stack[i], &info) && info.dli_sname) {
auto demangled = demangle(info.dli_sname);
auto addr_offset = static_cast<char*>(call_stack[i]) -
static_cast<char*>(info.dli_saddr);
sout << string::Sprintf("%-3d %*0p %s + %zd\n", i,
2 + sizeof(void*) * 2, call_stack[i],
demangled, addr_offset);
} else {
sout << string::Sprintf("%-3d %*0p\n", i, 2 + sizeof(void*) * 2,
call_stack[i]);
}
void* call_stack[TRACE_STACK_LIMIT];
auto size = backtrace(call_stack, TRACE_STACK_LIMIT);
auto symbols = backtrace_symbols(call_stack, size);
Dl_info info;
for (int i = 0; i < size; ++i) {
if (dladdr(call_stack[i], &info) && info.dli_sname) {
auto demangled = demangle(info.dli_sname);
auto addr_offset = static_cast<char*>(call_stack[i]) -
static_cast<char*>(info.dli_saddr);
sout << string::Sprintf("%-3d %*0p %s + %zd\n", i,
2 + sizeof(void*) * 2, call_stack[i], demangled,
addr_offset);
} else {
sout << string::Sprintf("%-3d %*0p\n", i, 2 + sizeof(void*) * 2,
call_stack[i]);
}
free(symbols);
}
free(symbols);
#else
sout << "Windows not support stack backtrace yet.";
sout << "Windows not support stack backtrace yet.";
#endif
err_str_ = sout.str();
}
err_str_ = sout.str();
}
const char* what() const noexcept { return err_str_.c_str(); }
};
struct EOFException : public std::exception {
......@@ -242,13 +251,8 @@ inline void throw_on_error(T e) {
throw_on_error(e, "");
}
#define PADDLE_THROW(...) \
do { \
throw ::paddle::platform::EnforceNotMet( \
std::make_exception_ptr( \
std::runtime_error(paddle::string::Sprintf(__VA_ARGS__))), \
__FILE__, __LINE__); \
} while (false)
#define PADDLE_THROW(...) \
throw ::paddle::platform::EnforceNotMet(__FILE__, __LINE__, __VA_ARGS__)
#ifndef REPLACE_ENFORCE_GLOG
#define PADDLE_ENFORCE(...) \
......
......@@ -127,7 +127,8 @@ def __bootstrap__():
'use_ngraph', 'initial_cpu_memory_in_mb', 'init_allocated_mem',
'free_idle_memory', 'paddle_num_threads', "dist_threadpool_size",
'eager_delete_tensor_gb', 'allocator_strategy',
'reader_queue_speed_test_mode', 'print_sub_graph_dir'
'reader_queue_speed_test_mode', 'print_sub_graph_dir',
'pe_profile_fname'
]
if 'Darwin' not in sysstr:
read_env_flags.append('use_pinned_memory')
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册