diff --git a/.gitignore b/.gitignore index 00368ede67d3d2426f50a278578a33d18b736ca0..801c76325c92e8c1b381f7398416ab703e829ae6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ *.DS_Store build/ +*.user diff --git a/CMakeLists.txt b/CMakeLists.txt index 007f1f18bb655e10c45bc7e21a299861eb204dd2..99c6c0d373052fa1be528ebb82c3d2f248e64bb0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,6 +14,7 @@ find_package(CUDA QUIET) find_package(Protobuf REQUIRED) find_package(PythonLibs 2.7 REQUIRED) find_package(PythonInterp 2.7 REQUIRED) +find_package(ZLIB REQUIRED) find_package(NumPy) find_package(Threads REQUIRED) find_package(Glog) diff --git a/cmake/flags.cmake b/cmake/flags.cmake index c95d4063105e74df7acd491558f399dff6a8e0a8..4b99e7f7fb6af3ca85d6dc642a4aea07e64c7049 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -58,8 +58,8 @@ set(COMMON_FLAGS -fPIC -fno-omit-frame-pointer -Wall -# -Wextra -# -Werror + -Wextra + -Werror -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wno-unused-parameter diff --git a/cmake/util.cmake b/cmake/util.cmake index f3227d27c53c24e0e2e6a68eccecb188b1f1c14d..5f2f4a075cc579fac827fefbfc30f6743d2e4cc9 100644 --- a/cmake/util.cmake +++ b/cmake/util.cmake @@ -1,19 +1,3 @@ -# MAC OS does not contain start-up and whole-archive args -if(APPLE) - set(GROUP_START "") - set(GROUP_END "") - - set(ARCHIVE_START "") - set(ARCHIVE_END "") -else() - set(GROUP_START "-Wl,--start-group") - set(GROUP_END "-Wl,--end-group") - - set(ARCHIVE_START "-Wl,--whole-archive") - set(ARCHIVE_END "-Wl,--no-whole-archive") -endif() - - # Some common routine for paddle compile. # target_circle_link_libraries @@ -23,17 +7,46 @@ endif() # Rest Arguments: libraries which link together. function(target_circle_link_libraries TARGET_NAME) if(APPLE) + set(LIBS) + set(inArchive OFF) + set(libsInArgn) + foreach(arg ${ARGN}) - list(APPEND OSX_LIBRARIES "-Wl,-force_load" "${arg}") + if(${arg} STREQUAL "ARCHIVE_START") + set(inArchive ON) + elseif(${arg} STREQUAL "ARCHIVE_END") + set(inArchive OFF) + else() + if(inArchive) + list(APPEND LIBS "-Wl,-force_load") + endif() + list(APPEND LIBS ${arg}) + list(APPEND libsInArgn ${arg}) + endif() endforeach() + + list(REVERSE libsInArgn) target_link_libraries(${TARGET_NAME} - ${OSX_LIBRARIES} -lz) - else() + ${LIBS} + ${libsInArgn}) + + else() # LINUX + set(LIBS) + + foreach(arg ${ARGN}) + if(${arg} STREQUAL "ARCHIVE_START") + list(APPEND LIBS "-Wl,--whole-archive") + elseif(${arg} STREQUAL "ARCHIVE_END") + list(APPEND LIBS "-Wl,--no-whole-archive") + else() + list(APPEND LIBS ${arg}) + endif() + endforeach() + target_link_libraries(${TARGET_NAME} - ${GROUP_START} - ${ARGN} - -lz - ${GROUP_END}) + "-Wl,--start-group" + ${LIBS} + "-Wl,--end-group") endif() endfunction() @@ -65,20 +78,20 @@ function(link_paddle_exe TARGET_NAME) if(PADDLE_WITH_INTERNAL) set(INTERAL_LIBS paddle_internal_gserver paddle_internal_parameter) target_circle_link_libraries(${TARGET_NAME} - ${ARCHIVE_START} + ARCHIVE_START paddle_internal_gserver paddle_internal_owlqn - ${ARCHIVE_END} + ARCHIVE_END paddle_internal_parameter) else() set(INTERAL_LIBS "") endif() target_circle_link_libraries(${TARGET_NAME} - ${ARCHIVE_START} + ARCHIVE_START paddle_gserver ${METRIC_LIBS} - ${ARCHIVE_END} + ARCHIVE_END paddle_pserver paddle_trainer_lib paddle_network @@ -91,8 +104,10 @@ function(link_paddle_exe TARGET_NAME) ${PROTOBUF_LIBRARY} ${CMAKE_THREAD_LIBS_INIT} ${CBLAS_LIBS} + ${INTERAL_LIBS} + ${ZLIB_LIBRARIES} ${CMAKE_DL_LIBS} - ${INTERAL_LIBS}) + ) if(WITH_PYTHON) target_link_libraries(${TARGET_NAME} diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp index 903922204343ec49a1696eb35b06c127310b432d..0f497e44d4c25c2f8b79a1ee3f0630456d587820 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp @@ -277,6 +277,7 @@ void NeuralNetwork::getState(MachineState& machineState) { } void NeuralNetwork::backward(const UpdateCallback& callback) { + gLayerStackTrace.pop(""); // tell layer trace is during backward. FOR_EACH_R(layer, layers_) { REGISTER_TIMER_INFO("BackwardTimer", (*layer)->getName().c_str()); if ((*layer)->needGradient()) { diff --git a/paddle/math/Allocator.h b/paddle/math/Allocator.h index ca8eadbc1aa42265c1e505d41e1a134b0e7f19b9..f7aa60380f23eeea91ee852480862f6b19caedec 100644 --- a/paddle/math/Allocator.h +++ b/paddle/math/Allocator.h @@ -49,7 +49,7 @@ public: */ virtual void* alloc(size_t size) { void* ptr; - posix_memalign(&ptr, 32ul, size); + CHECK_EQ(posix_memalign(&ptr, 32ul, size), 0); CHECK(ptr) << "Fail to allocate CPU memory: size=" << size; return ptr; } diff --git a/paddle/math/tests/test_SIMDFunctions.cpp b/paddle/math/tests/test_SIMDFunctions.cpp index bae5d8c684d8920b738fcd1cda1bd831f2333605..491b0cda7b9e1a13882aee6621e0de984709ae80 100644 --- a/paddle/math/tests/test_SIMDFunctions.cpp +++ b/paddle/math/tests/test_SIMDFunctions.cpp @@ -38,7 +38,7 @@ static std::mt19937 RandomEngine(time(0)); inline static std::unique_ptr NewVector(size_t len = VECTOR_LEN, size_t align = ALIGN) { float* ptr; - posix_memalign((void**)&ptr, align, len * sizeof(float)); + CHECK_EQ(posix_memalign((void**)&ptr, align, len * sizeof(float)), 0); return std::unique_ptr(ptr); } diff --git a/paddle/math/tests/test_perturbation.cpp b/paddle/math/tests/test_perturbation.cpp index 050f2ca9ced80d18bef2a83f087ee7aa88fa736d..51e346fef91bf8dae7a5084ba8b2d86d9021650b 100644 --- a/paddle/math/tests/test_perturbation.cpp +++ b/paddle/math/tests/test_perturbation.cpp @@ -254,10 +254,4 @@ int main(int argc, char** argv) { return RUN_ALL_TESTS(); } -#else - -int main(int argc, char const* argv[]) { - return 0; -} - #endif diff --git a/paddle/parameter/tests/test_common.cpp b/paddle/parameter/tests/test_common.cpp index 4f92aec1d967169e2c27579b8a95941c029a3e49..1a22abf7cf80157039f6147293e7648d654e45f7 100644 --- a/paddle/parameter/tests/test_common.cpp +++ b/paddle/parameter/tests/test_common.cpp @@ -125,9 +125,11 @@ TEST_F(CommonTest, sgdUpdate) { const size_t alignHeader[] = {0, 2, 3, 5, 7, 8}; for (auto& size : sizeVec_) { real *gradientBuffer, *valueBuffer, *momentumBuffer; - posix_memalign((void**)&gradientBuffer, 32, sizeof(real) * size); - posix_memalign((void**)&valueBuffer, 32, sizeof(real) * size); - posix_memalign((void**)&momentumBuffer, 32, sizeof(real) * size); + CHECK_EQ(posix_memalign((void**)&gradientBuffer, 32, sizeof(real) * size), + 0); + CHECK_EQ(posix_memalign((void**)&valueBuffer, 32, sizeof(real) * size), 0); + CHECK_EQ(posix_memalign((void**)&momentumBuffer, 32, sizeof(real) * size), + 0); for (size_t i = 0; i < size; i++) { gradientBuffer[i] = 1.0; diff --git a/paddle/utils/CMakeLists.txt b/paddle/utils/CMakeLists.txt index 3c08f1e3055f86aadca8844094381909e86df0c1..0557b01e36f078bebebbcb65af95357c96369514 100644 --- a/paddle/utils/CMakeLists.txt +++ b/paddle/utils/CMakeLists.txt @@ -2,12 +2,18 @@ file(GLOB UTIL_HEADERS . *.h) file(GLOB UTIL_SOURCES . *.cpp) - +if(APPLE) + file(GLOB UTIL_ARCH_SOURCES . arch/osx/*.cpp) +else() + file(GLOB UTIL_ARCH_SOURCES . arch/linux/*.cpp) +endif() add_library(paddle_utils STATIC - ${UTIL_SOURCES}) + ${UTIL_SOURCES} + ${UTIL_ARCH_SOURCES}) add_style_check_target(paddle_utils ${UTIL_HEADERS}) -add_style_check_target(paddle_utils ${UTIL_SOURCES}) +add_style_check_target(paddle_utils ${UTIL_SOURCES} + ${UTIL_ARCH_SOURCES}) add_dependencies(paddle_utils gen_proto_cpp) if(WITH_TESTING) add_subdirectory(tests) -endif() \ No newline at end of file +endif() diff --git a/paddle/utils/CustomStackTrace.cpp b/paddle/utils/CustomStackTrace.cpp index 50d7f5402f586771194fa5b1578293b7614ea1f2..232a478ecd93a7dcb7da7b02a5a1af37a1d1bc43 100644 --- a/paddle/utils/CustomStackTrace.cpp +++ b/paddle/utils/CustomStackTrace.cpp @@ -14,9 +14,44 @@ limitations under the License. */ #include "CustomStackTrace.h" +#include "CommandLineParser.h" +#include + +P_DEFINE_bool(layer_stack_error_only_current_thread, + true, + "Dump current thread or whole process layer stack when signal error " + "occurred. true means only dump current thread layer stack"); namespace paddle { CustomStackTrace gLayerStackTrace; +static std::mutex gLayerStackTraceMtx; +void installLayerStackTracer() { + logging::installFailureWriter([](const char* data, int sz) { + std::lock_guard guard(gLayerStackTraceMtx); + if (!gLayerStackTrace.empty()) { + size_t curTid = -1UL; + std::hash hasher; + gLayerStackTrace.dump([&curTid, &hasher](std::thread::id tid, + bool* isForwarding, + const std::string& layerName) { + if (curTid != hasher(tid)) { + if (curTid != -1UL) { + std::cerr << std::endl; + } + curTid = hasher(tid); + std::cerr << "Thread [" << tid << "] "; + if (isForwarding) { + std::cerr << (*isForwarding ? "Forwarding ": "Backwarding "); + } + } + std::cerr << layerName << ", "; + }, FLAGS_layer_stack_error_only_current_thread); + std::cerr << std::endl; + } + std::cerr.write(data, sz); + }); +} + } // namespace paddle diff --git a/paddle/utils/CustomStackTrace.h b/paddle/utils/CustomStackTrace.h index e1b2d2d8e5ee6ce572b10b94a42fb285078dddc1..774c4db2b9be40c38286ef1248bf77746949fd6b 100644 --- a/paddle/utils/CustomStackTrace.h +++ b/paddle/utils/CustomStackTrace.h @@ -15,6 +15,9 @@ limitations under the License. */ #pragma once #include +#include +#include +#include #include "ThreadLocal.h" @@ -29,25 +32,18 @@ namespace paddle { * @code{.cpp} * * paddle::CustomStackTrace stack; - * PASS_TEST=0; * for (auto& layer : layers){ * stack.push(layer->getName()); - * layer->forward(passType); + * layer->forward(); * } - * for (auto& layer : layers){ + * + * stack.pop(""); // mark under pop stage. + * + * for (auto it = layers.rbegin(); it != layers.rend(); ++it){ + * auto& layer = *it; * layer->backward(passType); * stack.pop(layer->getName()); * } - * - * if(passType == PASS_TEST) { - * stack.clear(); - * } - * else { - * stack.dump([](const std::string& layername){ - * LOG(INFO) << "LayerName: " << layername; - * }) - * } - * * * @endcode */ @@ -55,45 +51,141 @@ template class CustomStackTrace{ public: /** - * @brief Pop out an item from the top of the stack. For safety the item - * will be poped should equal to ip. + * @brief Pop out an item from the top of the stack if item == top. + * Else, just set status to popping. */ - void pop(const T& ip) { - auto& p = *logstack_; - CHECK_EQ(ip, p.top()); - p.pop(); + void pop(const T& item) { + pushing() = false; + auto& s = this->stack(); + if (item == s.top()) { + s.pop(); + } } + /** - * @brief Empty the stack by sequence from top to button. - * @param[in] callback A function deal with each item while dumping. - * It must have and only have a in parameter which is the stack item. + * @brief clear current thread stack. */ - template - void dump(Callback callback) { - auto& p = *logstack_; - while (!p.empty()) { - callback(p.top()); - p.pop(); + void clear() { + auto& s = stack(); + while (!s.empty()) { + s.pop(); } } + /** - * @brief Only empty the stack. + * @brief return true if all thread's stack is empty. + * @return true if empty */ - void clear() { - dump([](const T& ip){}); + bool empty() const { + std::lock_guard g(this->mtx_); + for (auto p : this->stackBuffers_) { + std::stack& s = *p.second; + if (!s.empty()) { + return false; + } + } + return true; + } + + + /** + * @brief DumpCallback Type. It will be invoked many times by dump method. + * + * The first parameter is stack thread id. + * The second parameter is the last action of stack is push or not. + * The third parameter is the item in stack. + */ + typedef std::function DumpCallback; + + /** + * Dump all thread stack, and all stack will be cleared. + */ + void dump(const DumpCallback& callback, bool onlyCurrentThread = false) { + std::lock_guard g(this->mtx_); + for (auto p : this->stackBuffers_) { + std::thread::id tid = p.first; + if (onlyCurrentThread && tid != std::this_thread::get_id()) { + continue; + } + std::stack& s = *p.second; + bool* isPush = nullptr; + auto it = this->pushingBuffers_.find(tid); + if (it != this->pushingBuffers_.end()) { + isPush = it->second; + } + + while (!s.empty()) { + callback(tid, isPush, s.top()); + s.pop(); + } + } } + /** - * @brief Push item ip to the top of the stack. + * @brief Push item to current thread stack. */ - void push(const T& ip) { - auto& p = *logstack_; - p.push(ip); + void push(const T& item) { + pushing() = true; + auto& p = this->stack(); + p.push(item); } private: - ThreadLocalD > logstack_; + /** + * Get thread local attribute, and save them into a map (threadId => TYPE*) + * + * @tparam TYPE thread local attribute type. + * @param threadLocal Thread Local object. + * @param buffers a map from threadId to TYPE* + */ + template + inline TYPE& getThreadLocal( + ThreadLocal& threadLocal, + std::unordered_map& buffers) { + TYPE* retv = threadLocal.get(false); + if (retv) { + return *retv; + } else { + std::lock_guard guard(this->mtx_); + retv = threadLocal.get(); + auto id = std::this_thread::get_id(); + buffers.insert({id, retv}); + return *retv; + } + } + + /** + * @brief Get thread local stack reference. + */ + std::stack& stack() { + return this->getThreadLocal(this->logStack_, + this->stackBuffers_); + } + + /** + * @brief Get thread local pushing flag. + */ + bool& pushing() { + return this->getThreadLocal(this->isPushing_, + this->pushingBuffers_); + } + +private: + mutable std::mutex mtx_; + + std::unordered_map* > stackBuffers_; + std::unordered_map pushingBuffers_; + ThreadLocal isPushing_; + ThreadLocal > logStack_; }; extern CustomStackTrace gLayerStackTrace; +/** + * @brief Install a failure handler to print layer stack when error. + */ +extern void installLayerStackTracer(); + } // namespace paddle diff --git a/paddle/utils/Locks.cpp b/paddle/utils/Locks.cpp deleted file mode 100644 index c2f58cf5764ef9d36a40f8fffa11075ed5ced75b..0000000000000000000000000000000000000000 --- a/paddle/utils/Locks.cpp +++ /dev/null @@ -1,106 +0,0 @@ -/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef __APPLE__ -#include -#endif - -#ifdef __APPLE__ -#ifndef PTHREAD_BARRIER_H_ -#define PTHREAD_BARRIER_H_ - -#include -#include - -typedef int pthread_barrierattr_t; -typedef struct { - pthread_mutex_t mutex; - pthread_cond_t cond; - int count; - int tripCount; -} pthread_barrier_t; - -int pthread_barrier_init(pthread_barrier_t *barrier, - const pthread_barrierattr_t *attr, unsigned int count) { - if (count == 0) { - errno = EINVAL; - return -1; - } - if (pthread_mutex_init(&barrier->mutex, 0) < 0) { - return -1; - } - if (pthread_cond_init(&barrier->cond, 0) < 0) { - pthread_mutex_destroy(&barrier->mutex); - return -1; - } - barrier->tripCount = count; - barrier->count = 0; - - return 0; -} - -int pthread_barrier_destroy(pthread_barrier_t *barrier) { - pthread_cond_destroy(&barrier->cond); - pthread_mutex_destroy(&barrier->mutex); - return 0; -} - -int pthread_barrier_wait(pthread_barrier_t *barrier) { - pthread_mutex_lock(&barrier->mutex); - ++(barrier->count); - if (barrier->count >= barrier->tripCount) { - barrier->count = 0; - pthread_cond_broadcast(&barrier->cond); - pthread_mutex_unlock(&barrier->mutex); - return 1; - } else { - pthread_cond_wait(&barrier->cond, &(barrier->mutex)); - pthread_mutex_unlock(&barrier->mutex); - return 0; - } -} - -#endif // PTHREAD_BARRIER_H_ - -typedef int pthread_spinlock_t; - -int pthread_spin_init(pthread_spinlock_t *lock, int pshared) { - __asm__ __volatile__("" ::: "memory"); - *lock = 0; - return 0; -} - -int pthread_spin_destroy(pthread_spinlock_t *lock) { - return 0; -} - -int pthread_spin_lock(pthread_spinlock_t *lock) { - while (1) { - int i; - for (i=0; i < 10000; i++) { - if (__sync_bool_compare_and_swap(lock, 0, 1)) { - return 0; - } - } - sched_yield(); - } -} - -int pthread_spin_unlock(pthread_spinlock_t *lock) { - __asm__ __volatile__("" ::: "memory"); - *lock = 0; - return 0; -} - -#endif // __APPLE__ diff --git a/paddle/utils/Locks.h b/paddle/utils/Locks.h index e7b0b77081f3695e4dad33068c2e3fdf65f631eb..1fc0363d34597c9447996479aaf771e46d0ba600 100644 --- a/paddle/utils/Locks.h +++ b/paddle/utils/Locks.h @@ -16,56 +16,11 @@ limitations under the License. */ #pragma once #include -#include #include -#include - #include #include -#ifdef __APPLE__ -#include -#endif - -#ifdef __APPLE__ -#ifndef PTHREAD_BARRIER_H_ -#define PTHREAD_BARRIER_H_ - -#include -#include - -typedef int pthread_barrierattr_t; -typedef struct { - pthread_mutex_t mutex; - pthread_cond_t cond; - int count; - int tripCount; -} pthread_barrier_t; - - -extern int pthread_barrier_init(pthread_barrier_t *barrier, - const pthread_barrierattr_t *attr, - unsigned int count); - -extern int pthread_barrier_destroy(pthread_barrier_t *barrier); - -extern int pthread_barrier_wait(pthread_barrier_t *barrier); - -#endif // PTHREAD_BARRIER_H_ - -typedef int pthread_spinlock_t; - -extern int pthread_spin_init(pthread_spinlock_t *lock, int pshared); - -extern int pthread_spin_destroy(pthread_spinlock_t *lock); - -extern int pthread_spin_lock(pthread_spinlock_t *lock); - -extern int pthread_spin_unlock(pthread_spinlock_t *lock); - -#endif - - +#include "DisableCopy.h" namespace paddle { @@ -142,58 +97,44 @@ protected: * which means it will keep trying to lock until lock on successfully. * The SpinLock disable copy. */ +class SpinLockPrivate; class SpinLock { public: - SpinLock() { pthread_spin_init(&lock_, 0); } - ~SpinLock() { pthread_spin_destroy(&lock_); } - SpinLock(const SpinLock&) = delete; - SpinLock& operator=(const SpinLock&) = delete; + DISABLE_COPY(SpinLock); + SpinLock(); + ~SpinLock(); // std::mutext interface - void lock() { pthread_spin_lock(&lock_); } - void unlock() { pthread_spin_unlock(&lock_); } + void lock(); + void unlock(); -protected: - pthread_spinlock_t lock_; - char padding_[64 - sizeof(pthread_spinlock_t)]; +private: + SpinLockPrivate* m; }; /** * A simple wapper of semaphore which can only be shared in the same process. */ - -#ifdef __APPLE__ - +class SemaphorePrivate; class Semaphore { public: - explicit Semaphore(int initValue = 0) { - sem_ = dispatch_semaphore_create(initValue); - } - - ~Semaphore() { dispatch_release(sem_); } - bool timeWait(struct timespec* ts) { - dispatch_time_t m = dispatch_walltime(ts, 0); - return (0 == dispatch_semaphore_wait(sem_, m)); - } - void wait() { dispatch_semaphore_wait(sem_, DISPATCH_TIME_FOREVER); } - void post() { dispatch_semaphore_signal(sem_);} - -protected: - dispatch_semaphore_t sem_; -}; + //! Disable copy & assign + Semaphore(const Semaphore& other) = delete; + Semaphore& operator= (const Semaphore&& other) = delete; -#else + //! Enable move. + Semaphore(Semaphore&& other): m(std::move(other.m)) { + } -class Semaphore { public: /** * @brief Construct Function. * @param[in] initValue the initial value of the * semaphore, default 0. */ - explicit Semaphore(int initValue = 0) { sem_init(&sem_, 0, initValue); } + explicit Semaphore(int initValue = 0); - ~Semaphore() { sem_destroy(&sem_); } + ~Semaphore(); /** * @brief The same as wait(), except if the decrement can not @@ -203,43 +144,38 @@ public: * @return ture if the decrement proceeds before ts, * else return false. */ - bool timeWait(struct timespec* ts) { return (0 == sem_timedwait(&sem_, ts)); } + bool timeWait(struct timespec* ts); /** * @brief decrement the semaphore. If the semaphore's value is 0, then call blocks. */ - void wait() { sem_wait(&sem_); } + void wait(); /** * @brief increment the semaphore. If the semaphore's value * greater than 0, wake up a thread blocked in wait(). */ - void post() { sem_post(&sem_); } + void post(); -protected: - sem_t sem_; +private: + SemaphorePrivate* m; }; -#endif - -static_assert(sizeof(SpinLock) == 64, "Wrong padding"); - /** * A simple wrapper of thread barrier. * The ThreadBarrier disable copy. */ +class ThreadBarrierPrivate; class ThreadBarrier { public: + DISABLE_COPY(ThreadBarrier); + /** * @brief Construct Function. Initialize the barrier should * wait for count threads in wait(). */ - explicit ThreadBarrier(int count) { - pthread_barrier_init(&barrier_, NULL, count); - } - ~ThreadBarrier() { pthread_barrier_destroy(&barrier_); } - ThreadBarrier(const ThreadBarrier&) = delete; - ThreadBarrier& operator=(const ThreadBarrier&) = delete; + explicit ThreadBarrier(int count); + ~ThreadBarrier(); /** * @brief . @@ -247,10 +183,10 @@ public: * then wake up all the count - 1 threads and continue run together. * Else block the thread until waked by other thread . */ - void wait() { pthread_barrier_wait(&barrier_); } + void wait(); -protected: - pthread_barrier_t barrier_; +private: + ThreadBarrierPrivate* m; }; /** diff --git a/paddle/utils/Stat.cpp b/paddle/utils/Stat.cpp index aab5446a98c0bae3f03b4b319836721bdd1112da..ff6e8ade2cd48f85c3f1a0c2acd5542097aa4117 100644 --- a/paddle/utils/Stat.cpp +++ b/paddle/utils/Stat.cpp @@ -25,7 +25,7 @@ namespace paddle { pid_t getTID() { #if defined(__APPLE__) || defined(__OSX__) pid_t tid = syscall(SYS_thread_selfid); - #elif defined(__LINUX__) + #else #ifndef __NR_gettid #define __NR_gettid 224 #endif diff --git a/paddle/utils/Util.cpp b/paddle/utils/Util.cpp index 1c1d75dc5bed98848fcb03366b383201ee6f5024..d8c3376fb18c48185abdcb7a6d65fa56f0eaa290 100644 --- a/paddle/utils/Util.cpp +++ b/paddle/utils/Util.cpp @@ -129,13 +129,7 @@ void runInitFunctions() { void initMain(int argc, char** argv) { initializeLogging(argc, argv); - logging::installFailureWriter([](const char* data, int sz) { - std::cerr << "Current Layer forward/backward stack is " << std::endl; - gLayerStackTrace.dump([](const std::string& layername){ - std::cerr << "LayerName: " << layername << std::endl; - }); - std::cerr.write(data, sz); - }); + installLayerStackTracer(); std::string line; for (int i = 0; i < argc; ++i) { line += argv[i]; diff --git a/paddle/utils/arch/linux/Locks.cpp b/paddle/utils/arch/linux/Locks.cpp new file mode 100644 index 0000000000000000000000000000000000000000..347ae64c26dfdfcdaff62886481c20e9c4c7cfec --- /dev/null +++ b/paddle/utils/arch/linux/Locks.cpp @@ -0,0 +1,85 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/utils/Locks.h" +#include +#include + +namespace paddle { +class SemaphorePrivate { +public: + sem_t sem; +}; + +Semaphore::Semaphore(int initValue): m(new SemaphorePrivate()) { + sem_init(&m->sem, 0, initValue); +} + +Semaphore::~Semaphore() { + sem_destroy(&m->sem); +} + +bool Semaphore::timeWait(struct timespec* ts) { + return (0 == sem_timedwait(&m->sem, ts)); +} + +void Semaphore::wait() { + sem_wait(&m->sem); +} + +void Semaphore::post() { + sem_post(&m->sem); +} + + +class SpinLockPrivate { +public: + inline SpinLockPrivate() { pthread_spin_init(&lock_, 0); } + inline ~SpinLockPrivate() { pthread_spin_destroy(&lock_); } + pthread_spinlock_t lock_; + char padding_[64 - sizeof(pthread_spinlock_t)]; +}; + +SpinLock::SpinLock():m(new SpinLockPrivate()) {} + + +SpinLock::~SpinLock() { delete m; } + +void SpinLock::lock() { + pthread_spin_lock(&m->lock_); +} + +void SpinLock::unlock() { + pthread_spin_unlock(&m->lock_); +} + +class ThreadBarrierPrivate { +public: + pthread_barrier_t barrier_; +}; + +ThreadBarrier::ThreadBarrier(int count): m(new ThreadBarrierPrivate()) { + pthread_barrier_init(&m->barrier_, nullptr, count); +} + +ThreadBarrier::~ThreadBarrier() { + pthread_barrier_destroy(&m->barrier_); + delete m; +} + +void ThreadBarrier::wait() { + pthread_barrier_wait(&m->barrier_); +} + +} // namespace paddle diff --git a/paddle/utils/arch/osx/Locks.cpp b/paddle/utils/arch/osx/Locks.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5e0411624fd60cd441d251c14084cd0a1ca42bb5 --- /dev/null +++ b/paddle/utils/arch/osx/Locks.cpp @@ -0,0 +1,112 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/utils/Locks.h" +#include "paddle/utils/Logging.h" +#include +#include +namespace paddle { +class SemaphorePrivate { +public: + ~SemaphorePrivate() { + dispatch_release(sem); + } + + dispatch_semaphore_t sem; +}; + +Semaphore::Semaphore(int initValue): m(new SemaphorePrivate()) { + m->sem = dispatch_semaphore_create(initValue); +} + +Semaphore::~Semaphore() { + delete m; +} + +bool Semaphore::timeWait(timespec *ts) { + dispatch_time_t tm = dispatch_walltime(ts, 0); + return (0 == dispatch_semaphore_wait(m->sem, tm)); +} + +void Semaphore::wait() { + dispatch_semaphore_wait(m->sem, DISPATCH_TIME_FOREVER); +} + +void Semaphore::post() { + dispatch_semaphore_signal(m->sem); +} + +class SpinLockPrivate { +public: + SpinLockPrivate(): lock_(0) {} + + OSSpinLock lock_; + char padding_[64 - sizeof(OSSpinLock)]; // Padding to cache line size +}; + +SpinLock::SpinLock(): m(new SpinLockPrivate()) {} +SpinLock::~SpinLock() { delete m; } + +void SpinLock::lock() { + OSSpinLockLock(&m->lock_); +} + +void SpinLock::unlock() { + OSSpinLockUnlock(&m->lock_); +} + + +class ThreadBarrierPrivate { +public: + pthread_mutex_t mutex; + pthread_cond_t cond; + int count; + int tripCount; + + inline explicit ThreadBarrierPrivate(int cnt):count(0), tripCount(cnt) { + CHECK_NE(cnt, 0); + CHECK_GE(pthread_mutex_init(&mutex, 0), 0); + CHECK_GE(pthread_cond_init(&cond, 0), 0); + } + + inline ~ThreadBarrierPrivate() { + pthread_cond_destroy(&cond); + pthread_mutex_destroy(&mutex); + } + + /** + * @brief wait + * @return true if the last wait + */ + inline bool wait() { + pthread_mutex_lock(&mutex); + ++count; + if (count > tripCount) { + count = 0; + pthread_cond_broadcast(&cond); + pthread_mutex_unlock(&mutex); + return true; + } else { + pthread_cond_wait(&cond, &mutex); + pthread_mutex_unlock(&mutex); + return false; + } + } +}; + +ThreadBarrier::ThreadBarrier(int count): m(new ThreadBarrierPrivate(count)) {} +ThreadBarrier::~ThreadBarrier() { delete m; } +void ThreadBarrier::wait() { m->wait(); } + +} // namespace paddle diff --git a/paddle/utils/tests/CMakeLists.txt b/paddle/utils/tests/CMakeLists.txt index 147ee3f6d6d86775f2f8c7839c79180f1daffa76..5b31cd393dd1fc319be0ae9a5811f5637617e08d 100644 --- a/paddle/utils/tests/CMakeLists.txt +++ b/paddle/utils/tests/CMakeLists.txt @@ -2,3 +2,15 @@ add_simple_unittest(test_CommandLineParser) add_simple_unittest(test_Logging) add_simple_unittest(test_Thread) add_simple_unittest(test_StringUtils) +add_simple_unittest(test_CustomStackTrace) + +add_executable( + test_CustomStackTracePrint + test_CustomStackTracePrint.cpp +) +link_paddle_exe(test_CustomStackTracePrint) +if(NOT APPLE) + add_test(NAME test_CustomStackTracePrint + COMMAND ${PROJ_ROOT}/paddle/utils/tests/test_CustomStackTracePrint.sh + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) +endif() diff --git a/paddle/utils/tests/test_CustomStackTrace.cpp b/paddle/utils/tests/test_CustomStackTrace.cpp new file mode 100644 index 0000000000000000000000000000000000000000..26ca4c678a650df50d372b0fbb4c3e03d52f91df --- /dev/null +++ b/paddle/utils/tests/test_CustomStackTrace.cpp @@ -0,0 +1,95 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include + +#include "paddle/utils/CustomStackTrace.h" +#include "paddle/utils/CommandLineParser.h" +#include "paddle/utils/Util.h" +#include "paddle/utils/Locks.h" + +P_DEFINE_int32(test_thread_num, 10, "testing thread number"); + +void testNormalImpl(const std::function&, + size_t, size_t, + paddle::ThreadBarrier&, + paddle::ThreadBarrier&)>& callback) { + paddle::CustomStackTrace tracer; + paddle::ThreadBarrier doneBarrier(FLAGS_test_thread_num + 1); + paddle::ThreadBarrier startBarrier(FLAGS_test_thread_num + 1); + constexpr size_t countDown = 10; + constexpr size_t layerSize = 1000; + std::vector> threads; + threads.reserve(FLAGS_test_thread_num); + + for (int32_t i=0; i < FLAGS_test_thread_num; ++i) { + threads.emplace_back(new std::thread([&tracer, &countDown, &layerSize, + &startBarrier, &doneBarrier, + &callback]{ + callback(tracer, countDown, layerSize, startBarrier, doneBarrier); + })); + } + size_t cntDown = countDown; + while (cntDown-- > 0) { + startBarrier.wait(); + doneBarrier.wait(); + ASSERT_TRUE(tracer.empty()); + } + + for (auto& thread : threads) { + thread->join(); + } +} + + +TEST(CustomStackTrace, normalTrain) { + testNormalImpl([](paddle::CustomStackTrace& tracer, + size_t countDown, size_t layerSize, + paddle::ThreadBarrier& start, paddle::ThreadBarrier& finish){ + while (countDown-- > 0) { + start.wait(); + for (size_t i=0; i < layerSize; ++i) { + tracer.push("layer_" + std::to_string(i)); + } + tracer.pop(""); + for (size_t i=0; i < layerSize; ++i) { + tracer.pop("layer_" + std::to_string(layerSize - 1 - i)); + } + finish.wait(); + } + }); +} + +TEST(CustomStackTrace, normalTest) { + testNormalImpl([] (paddle::CustomStackTrace& tracer, + size_t countDown, size_t layerSize, + paddle::ThreadBarrier& start, paddle::ThreadBarrier& finish){ + while (countDown-- > 0) { + start.wait(); + for (size_t i=0; i < layerSize; ++i) { + tracer.push("layer_" + std::to_string(i)); + } + tracer.clear(); // in forward test, tracer will clear after forward. + finish.wait(); + } + }); +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + paddle::initMain(argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/paddle/utils/tests/test_CustomStackTracePrint.cpp b/paddle/utils/tests/test_CustomStackTracePrint.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c19c98614e6a7d6285990aa19849131579f7307b --- /dev/null +++ b/paddle/utils/tests/test_CustomStackTracePrint.cpp @@ -0,0 +1,29 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/utils/Util.h" +#include "paddle/utils/CustomStackTrace.h" + +int main(int argc, char** argv) { + paddle::initMain(argc, argv); + + for (size_t i=0; i < 1000; ++i) { + paddle::gLayerStackTrace.push("layer_" + std::to_string(i)); + if (i == 998) { + throw "Unhandle exception"; + } + } + + return 0; +} diff --git a/paddle/utils/tests/test_CustomStackTracePrint.sh b/paddle/utils/tests/test_CustomStackTracePrint.sh new file mode 100755 index 0000000000000000000000000000000000000000..b5543485f365adee49629578d470a14e0c742547 --- /dev/null +++ b/paddle/utils/tests/test_CustomStackTracePrint.sh @@ -0,0 +1,15 @@ +#!/bin/bash +echo "Test Custom Stack Trace print correct result when fail" +./test_CustomStackTracePrint >customStackTraceLog 2>&1 +if [ $? -eq 0 ]; then + exit 1 +else + set -e + TEXT="" + for ((i=0; i<=998; i++)) + do + TEXT="layer_$i, "$TEXT + done + TEXT="Forwarding "$TEXT + grep -q "$TEXT" customStackTraceLog +fi