From b384af58bec85702b33dc0080102e24d464503cf Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 7 Sep 2016 22:06:27 +0800 Subject: [PATCH] Fix bugs in CustomStackTrace. * Make layer stack trace shows ThreadId, Forward or Backward. Change-Id: Iba1477adb8c9115c3a67ff2959bb5c878ca706c7 --- paddle/.gitignore | 1 + .../gradientmachines/NeuralNetwork.cpp | 1 + paddle/utils/CustomStackTrace.cpp | 35 ++++ paddle/utils/CustomStackTrace.h | 164 ++++++++++++++---- paddle/utils/Util.cpp | 8 +- 5 files changed, 166 insertions(+), 43 deletions(-) diff --git a/paddle/.gitignore b/paddle/.gitignore index b89bd9d9463..f921eef1415 100644 --- a/paddle/.gitignore +++ b/paddle/.gitignore @@ -40,3 +40,4 @@ HPPL_ERROR_LOG unittest.list proto dist +setup.py diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp index fca52828957..eb1522a178d 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp @@ -277,6 +277,7 @@ void NeuralNetwork::getState(MachineState& machineState) { } void NeuralNetwork::backward(const UpdateCallback& callback) { + gLayerStackTrace.pop(""); // tell layer trace is during backward. FOR_EACH_R(layer, layers_) { REGISTER_TIMER_INFO("BackwardTimer", (*layer)->getName().c_str()); if ((*layer)->needGradient()) { diff --git a/paddle/utils/CustomStackTrace.cpp b/paddle/utils/CustomStackTrace.cpp index 50d7f5402f5..232a478ecd9 100644 --- a/paddle/utils/CustomStackTrace.cpp +++ b/paddle/utils/CustomStackTrace.cpp @@ -14,9 +14,44 @@ limitations under the License. */ #include "CustomStackTrace.h" +#include "CommandLineParser.h" +#include + +P_DEFINE_bool(layer_stack_error_only_current_thread, + true, + "Dump current thread or whole process layer stack when signal error " + "occurred. true means only dump current thread layer stack"); namespace paddle { CustomStackTrace gLayerStackTrace; +static std::mutex gLayerStackTraceMtx; +void installLayerStackTracer() { + logging::installFailureWriter([](const char* data, int sz) { + std::lock_guard guard(gLayerStackTraceMtx); + if (!gLayerStackTrace.empty()) { + size_t curTid = -1UL; + std::hash hasher; + gLayerStackTrace.dump([&curTid, &hasher](std::thread::id tid, + bool* isForwarding, + const std::string& layerName) { + if (curTid != hasher(tid)) { + if (curTid != -1UL) { + std::cerr << std::endl; + } + curTid = hasher(tid); + std::cerr << "Thread [" << tid << "] "; + if (isForwarding) { + std::cerr << (*isForwarding ? "Forwarding ": "Backwarding "); + } + } + std::cerr << layerName << ", "; + }, FLAGS_layer_stack_error_only_current_thread); + std::cerr << std::endl; + } + std::cerr.write(data, sz); + }); +} + } // namespace paddle diff --git a/paddle/utils/CustomStackTrace.h b/paddle/utils/CustomStackTrace.h index e1b2d2d8e5e..774c4db2b9b 100644 --- a/paddle/utils/CustomStackTrace.h +++ b/paddle/utils/CustomStackTrace.h @@ -15,6 +15,9 @@ limitations under the License. */ #pragma once #include +#include +#include +#include #include "ThreadLocal.h" @@ -29,25 +32,18 @@ namespace paddle { * @code{.cpp} * * paddle::CustomStackTrace stack; - * PASS_TEST=0; * for (auto& layer : layers){ * stack.push(layer->getName()); - * layer->forward(passType); + * layer->forward(); * } - * for (auto& layer : layers){ + * + * stack.pop(""); // mark under pop stage. + * + * for (auto it = layers.rbegin(); it != layers.rend(); ++it){ + * auto& layer = *it; * layer->backward(passType); * stack.pop(layer->getName()); * } - * - * if(passType == PASS_TEST) { - * stack.clear(); - * } - * else { - * stack.dump([](const std::string& layername){ - * LOG(INFO) << "LayerName: " << layername; - * }) - * } - * * * @endcode */ @@ -55,45 +51,141 @@ template class CustomStackTrace{ public: /** - * @brief Pop out an item from the top of the stack. For safety the item - * will be poped should equal to ip. + * @brief Pop out an item from the top of the stack if item == top. + * Else, just set status to popping. */ - void pop(const T& ip) { - auto& p = *logstack_; - CHECK_EQ(ip, p.top()); - p.pop(); + void pop(const T& item) { + pushing() = false; + auto& s = this->stack(); + if (item == s.top()) { + s.pop(); + } } + /** - * @brief Empty the stack by sequence from top to button. - * @param[in] callback A function deal with each item while dumping. - * It must have and only have a in parameter which is the stack item. + * @brief clear current thread stack. */ - template - void dump(Callback callback) { - auto& p = *logstack_; - while (!p.empty()) { - callback(p.top()); - p.pop(); + void clear() { + auto& s = stack(); + while (!s.empty()) { + s.pop(); } } + /** - * @brief Only empty the stack. + * @brief return true if all thread's stack is empty. + * @return true if empty */ - void clear() { - dump([](const T& ip){}); + bool empty() const { + std::lock_guard g(this->mtx_); + for (auto p : this->stackBuffers_) { + std::stack& s = *p.second; + if (!s.empty()) { + return false; + } + } + return true; + } + + + /** + * @brief DumpCallback Type. It will be invoked many times by dump method. + * + * The first parameter is stack thread id. + * The second parameter is the last action of stack is push or not. + * The third parameter is the item in stack. + */ + typedef std::function DumpCallback; + + /** + * Dump all thread stack, and all stack will be cleared. + */ + void dump(const DumpCallback& callback, bool onlyCurrentThread = false) { + std::lock_guard g(this->mtx_); + for (auto p : this->stackBuffers_) { + std::thread::id tid = p.first; + if (onlyCurrentThread && tid != std::this_thread::get_id()) { + continue; + } + std::stack& s = *p.second; + bool* isPush = nullptr; + auto it = this->pushingBuffers_.find(tid); + if (it != this->pushingBuffers_.end()) { + isPush = it->second; + } + + while (!s.empty()) { + callback(tid, isPush, s.top()); + s.pop(); + } + } } + /** - * @brief Push item ip to the top of the stack. + * @brief Push item to current thread stack. */ - void push(const T& ip) { - auto& p = *logstack_; - p.push(ip); + void push(const T& item) { + pushing() = true; + auto& p = this->stack(); + p.push(item); } private: - ThreadLocalD > logstack_; + /** + * Get thread local attribute, and save them into a map (threadId => TYPE*) + * + * @tparam TYPE thread local attribute type. + * @param threadLocal Thread Local object. + * @param buffers a map from threadId to TYPE* + */ + template + inline TYPE& getThreadLocal( + ThreadLocal& threadLocal, + std::unordered_map& buffers) { + TYPE* retv = threadLocal.get(false); + if (retv) { + return *retv; + } else { + std::lock_guard guard(this->mtx_); + retv = threadLocal.get(); + auto id = std::this_thread::get_id(); + buffers.insert({id, retv}); + return *retv; + } + } + + /** + * @brief Get thread local stack reference. + */ + std::stack& stack() { + return this->getThreadLocal(this->logStack_, + this->stackBuffers_); + } + + /** + * @brief Get thread local pushing flag. + */ + bool& pushing() { + return this->getThreadLocal(this->isPushing_, + this->pushingBuffers_); + } + +private: + mutable std::mutex mtx_; + + std::unordered_map* > stackBuffers_; + std::unordered_map pushingBuffers_; + ThreadLocal isPushing_; + ThreadLocal > logStack_; }; extern CustomStackTrace gLayerStackTrace; +/** + * @brief Install a failure handler to print layer stack when error. + */ +extern void installLayerStackTracer(); + } // namespace paddle diff --git a/paddle/utils/Util.cpp b/paddle/utils/Util.cpp index 1c1d75dc5be..d8c3376fb18 100644 --- a/paddle/utils/Util.cpp +++ b/paddle/utils/Util.cpp @@ -129,13 +129,7 @@ void runInitFunctions() { void initMain(int argc, char** argv) { initializeLogging(argc, argv); - logging::installFailureWriter([](const char* data, int sz) { - std::cerr << "Current Layer forward/backward stack is " << std::endl; - gLayerStackTrace.dump([](const std::string& layername){ - std::cerr << "LayerName: " << layername << std::endl; - }); - std::cerr.write(data, sz); - }); + installLayerStackTracer(); std::string line; for (int i = 0; i < argc; ++i) { line += argv[i]; -- GitLab