stream_callback_manager.h 2.7 KB
Newer Older
S
sneaxiy 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cuda.h>
#include <cuda_runtime.h>
#include <functional>
#include <memory>
P
peizhilin 已提交
21
#include "ThreadPool.h"
S
sneaxiy 已提交
22 23 24 25 26 27 28 29 30 31 32 33 34 35
#include "paddle/fluid/platform/enforce.h"

namespace paddle {
namespace platform {

class StreamCallbackManager;

struct StreamCallbackContext {
  template <typename Callback>
  inline StreamCallbackContext(const StreamCallbackManager *manager,
                               Callback &&callback)
      : manager_(manager), callback_(callback) {}

  const StreamCallbackManager *manager_;  // do not own
S
sneaxiy 已提交
36
  std::function<void()> callback_;
S
sneaxiy 已提交
37 38 39 40 41 42 43 44 45
};

class StreamCallbackManager {
 public:
  explicit inline StreamCallbackManager(cudaStream_t stream = nullptr)
      : stream_(stream), thread_pool_(new ThreadPool(1)) {}

  template <typename Callback>
  inline void AddCallback(Callback &&callback) const {
S
sneaxiy 已提交
46 47 48
    auto *stream_callback_context =
        new StreamCallbackContext(this, std::forward<Callback>(callback));
#if CUDA_VERSION >= 10000
P
peizhilin 已提交
49 50 51
    PADDLE_ENFORCE(cudaLaunchHostFunc(stream_,
                                      StreamCallbackManager::StreamCallbackFunc,
                                      stream_callback_context));  // NOLINT
S
sneaxiy 已提交
52
#else
P
peizhilin 已提交
53 54 55
    PADDLE_ENFORCE(cudaStreamAddCallback(
        stream_, StreamCallbackManager::StreamCallbackFunc,
        stream_callback_context, 0));  // NOLINT
S
sneaxiy 已提交
56
#endif
S
sneaxiy 已提交
57 58 59 60 61 62 63 64
  }

  void Wait() const { thread_pool_.reset(new ThreadPool(1)); }

 private:
  const cudaStream_t stream_;
  mutable std::unique_ptr<ThreadPool> thread_pool_;

S
sneaxiy 已提交
65 66 67 68 69
// cudaStreamCallback cannot call CUDA API inside, so we have to use
// thread_pool here
#if CUDA_VERSION >= 10000
  static void CUDART_CB StreamCallbackFunc(void *user_data)
#else
S
sneaxiy 已提交
70
  static void CUDART_CB StreamCallbackFunc(cudaStream_t stream,
S
sneaxiy 已提交
71 72 73
                                           cudaError_t status, void *user_data)
#endif
  {
S
sneaxiy 已提交
74 75 76 77 78
    auto *callback_context_ptr =
        reinterpret_cast<StreamCallbackContext *>(user_data);
    callback_context_ptr->manager_->thread_pool_->enqueue([=]() {
      std::unique_ptr<StreamCallbackContext> callback_context(
          callback_context_ptr);
S
sneaxiy 已提交
79
      callback_context->callback_();
S
sneaxiy 已提交
80 81 82 83 84 85
    });
  }
};

}  // namespace platform
}  // namespace paddle