stream_callback_manager.h 2.7 KB
Newer Older
S
sneaxiy 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cuda.h>
#include <cuda_runtime.h>
#include <functional>
#include <memory>
#include "ThreadPool.h"
#include "paddle/fluid/platform/enforce.h"

namespace paddle {
namespace platform {

using StreamCallback = std::function<void(cudaStream_t, cudaError_t)>;

class StreamCallbackManager;

struct StreamCallbackContext {
  template <typename Callback>
  inline StreamCallbackContext(const StreamCallbackManager *manager,
                               Callback &&callback)
      : manager_(manager), callback_(callback) {}

  const StreamCallbackManager *manager_;  // do not own
  StreamCallback callback_;
};

class StreamCallbackManager {
 public:
  explicit inline StreamCallbackManager(cudaStream_t stream = nullptr)
      : stream_(stream), thread_pool_(new ThreadPool(1)) {}

  template <typename Callback>
  inline void AddCallback(Callback &&callback) const {
    AddCallbackWithStreamAndErrorInfo(
        [=](cudaStream_t, cudaError_t) { callback(); });
  }

  template <typename Callback>
  inline void AddCallbackWithStreamAndErrorInfo(Callback &&callback) const {
    auto *stream_callback_context = new StreamCallbackContext(this, callback);
    PADDLE_ENFORCE(cudaStreamAddCallback(
        stream_, StreamCallbackManager::StreamCallbackFunc,
        stream_callback_context, 0));
  }

  void Wait() const { thread_pool_.reset(new ThreadPool(1)); }

 private:
  const cudaStream_t stream_;
  mutable std::unique_ptr<ThreadPool> thread_pool_;

  // cudaStreamCallback cannot call CUDA API inside, so we have to use
  // thread_pool here
  static void CUDART_CB StreamCallbackFunc(cudaStream_t stream,
                                           cudaError_t status,
                                           void *user_data) {
    auto *callback_context_ptr =
        reinterpret_cast<StreamCallbackContext *>(user_data);
    callback_context_ptr->manager_->thread_pool_->enqueue([=]() {
      std::unique_ptr<StreamCallbackContext> callback_context(
          callback_context_ptr);
      callback_context->callback_(stream, status);
    });
  }
};

}  // namespace platform
}  // namespace paddle