From c927939b9fa0ad8fd839650330e07ae396751d8f Mon Sep 17 00:00:00 2001 From: willzhang4a58 Date: Tue, 13 Jun 2017 11:17:59 +0800 Subject: [PATCH] add register_gpu_kernel, cpu_channel --- oneflow/core/actor/actor.cpp | 2 +- oneflow/core/actor/actor.h | 2 +- oneflow/core/actor/boxing_actor.cpp | 4 ++-- oneflow/core/actor/boxing_actor.h | 2 +- oneflow/core/actor/copy_actor.cpp | 2 +- oneflow/core/actor/copy_actor.h | 2 +- oneflow/core/actor/copy_comm_net_actor.cpp | 2 +- oneflow/core/actor/copy_hd_actor.cpp | 2 +- oneflow/core/actor/fw_data_comp_actor.cpp | 4 ++-- oneflow/core/actor/fw_data_comp_actor.h | 2 +- oneflow/core/actor/model_update_comp_actor.cpp | 10 +++++----- oneflow/core/actor/model_update_comp_actor.h | 10 +++++----- oneflow/core/kernel/convolution_kernel.cpp | 6 +++--- oneflow/core/kernel/convolution_kernel.cu | 6 ++++-- oneflow/core/kernel/convolution_kernel.h | 8 ++++---- oneflow/core/kernel/kernel.cpp | 2 +- oneflow/core/kernel/kernel.h | 13 +++++-------- oneflow/core/kernel/kernel_context.h | 16 ++++++++++++++++ oneflow/core/kernel/kernel_manager.h | 6 ++++-- 19 files changed, 59 insertions(+), 42 deletions(-) create mode 100644 oneflow/core/kernel/kernel_context.h diff --git a/oneflow/core/actor/actor.cpp b/oneflow/core/actor/actor.cpp index 7a209da2e3..80469c0948 100644 --- a/oneflow/core/actor/actor.cpp +++ b/oneflow/core/actor/actor.cpp @@ -44,7 +44,7 @@ void Actor::Init(const TaskProto& task_proto) { } void Actor::WardKernel( - const KernelContext& kernel_ctx, + const KernelCtx& kernel_ctx, std::function(uint64_t)> Regst4RegstDescId) { for (const ExecKernel& ek : exec_kernel_vec_) { (ek.kernel->*ward_func_)(kernel_ctx, [&](const std::string& bn_in_op) { diff --git a/oneflow/core/actor/actor.h b/oneflow/core/actor/actor.h index 6398500977..9ef7c4f326 100644 --- a/oneflow/core/actor/actor.h +++ b/oneflow/core/actor/actor.h @@ -34,7 +34,7 @@ class Actor { Actor() = default; void WardKernel( - const KernelContext& kernel_ctx, + const KernelCtx& kernel_ctx, std::function(uint64_t)> Regst4RegstDescId); void ForEachProducedRegst(std::function); uint64_t RegstDescId4Name(const std::string& name) const { diff --git a/oneflow/core/actor/boxing_actor.cpp b/oneflow/core/actor/boxing_actor.cpp index a24f337970..cd1f9f16d1 100644 --- a/oneflow/core/actor/boxing_actor.cpp +++ b/oneflow/core/actor/boxing_actor.cpp @@ -11,7 +11,7 @@ void BoxingActor::Init(const TaskProto& task_proto) { void BoxingActor::ProcessMsg(const ActorMsg& msg, const ThreadContext& thread_ctx) { - KernelContext kernel_ctx; + KernelCtx kernel_ctx; if (TryUpdtStateAsFromRegstReader(msg.regst_warpper()->regst_raw_ptr()) != 0) { std::shared_ptr regst_wp = msg.regst_warpper(); auto waiting_in_regst_it = waiting_in_regst_.find(regst_wp->piece_id()); @@ -36,7 +36,7 @@ void BoxingActor::ProcessMsg(const ActorMsg& msg, } } -void BoxingActor::WardKernelAndSendMsg(const KernelContext& kernel_ctx) { +void BoxingActor::WardKernelAndSendMsg(const KernelCtx& kernel_ctx) { uint64_t piece_id = ready_in_regst_.front().first; WardKernel(kernel_ctx, [this](uint64_t regst_desc_id) -> std::shared_ptr { Regst* regst = GetCurWriteableRegst(regst_desc_id); diff --git a/oneflow/core/actor/boxing_actor.h b/oneflow/core/actor/boxing_actor.h index 7135710f52..af8d52e027 100644 --- a/oneflow/core/actor/boxing_actor.h +++ b/oneflow/core/actor/boxing_actor.h @@ -18,7 +18,7 @@ class BoxingActor final : public Actor { using RDescId2RwMap = HashMap>; using RDescId2RwMapPtr = std::unique_ptr; - void WardKernelAndSendMsg(const KernelContext&); + void WardKernelAndSendMsg(const KernelCtx&); // HashMap waiting_in_regst_; diff --git a/oneflow/core/actor/copy_actor.cpp b/oneflow/core/actor/copy_actor.cpp index 56a0ca9f77..8e23f1e364 100644 --- a/oneflow/core/actor/copy_actor.cpp +++ b/oneflow/core/actor/copy_actor.cpp @@ -9,7 +9,7 @@ void CopyActor::Init(const TaskProto& task_proto) { } void CopyActor::ProcessMsgWithKernelCtx(const ActorMsg& msg, - const KernelContext& kernel_ctx) { + const KernelCtx& kernel_ctx) { if (TryUpdtStateAsFromRegstReader(msg.regst_warpper()->regst_raw_ptr()) != 0) { waiting_in_regst_.push(std::move(msg.regst_warpper())); } diff --git a/oneflow/core/actor/copy_actor.h b/oneflow/core/actor/copy_actor.h index d12a4848a3..b26be0260a 100644 --- a/oneflow/core/actor/copy_actor.h +++ b/oneflow/core/actor/copy_actor.h @@ -15,7 +15,7 @@ public: protected: CopyActor() = default; - void ProcessMsgWithKernelCtx(const ActorMsg& msg, const KernelContext& kernel_ctx); + void ProcessMsgWithKernelCtx(const ActorMsg& msg, const KernelCtx& kernel_ctx); private: std::queue> waiting_in_regst_; diff --git a/oneflow/core/actor/copy_comm_net_actor.cpp b/oneflow/core/actor/copy_comm_net_actor.cpp index ee901ec028..2e72be96d1 100644 --- a/oneflow/core/actor/copy_comm_net_actor.cpp +++ b/oneflow/core/actor/copy_comm_net_actor.cpp @@ -6,7 +6,7 @@ namespace oneflow { void CopyCommNetActor::ProcessMsg(const ActorMsg& msg, const ThreadContext&) { - KernelContext kernel_ctx; + KernelCtx kernel_ctx; ProcessMsgWithKernelCtx(msg, kernel_ctx); } diff --git a/oneflow/core/actor/copy_hd_actor.cpp b/oneflow/core/actor/copy_hd_actor.cpp index c09200a813..87398e71f1 100644 --- a/oneflow/core/actor/copy_hd_actor.cpp +++ b/oneflow/core/actor/copy_hd_actor.cpp @@ -6,7 +6,7 @@ namespace oneflow { void CopyHdActor::ProcessMsg(const ActorMsg& msg, const ThreadContext& thread_ctx) { - KernelContext kernel_ctx; + KernelCtx kernel_ctx; kernel_ctx.cuda_stream = thread_ctx.copy_hd_cuda_stream; ProcessMsgWithKernelCtx(msg, kernel_ctx); } diff --git a/oneflow/core/actor/fw_data_comp_actor.cpp b/oneflow/core/actor/fw_data_comp_actor.cpp index a8c9aea800..126b8d549e 100644 --- a/oneflow/core/actor/fw_data_comp_actor.cpp +++ b/oneflow/core/actor/fw_data_comp_actor.cpp @@ -25,7 +25,7 @@ bool FwDataCompActor::IsReadReady() { void FwDataCompActor::ProcessMsg(const ActorMsg& msg, const ThreadContext& thread_ctx) { - KernelContext kernel_ctx; + KernelCtx kernel_ctx; kernel_ctx.cuda_stream = thread_ctx.compute_cuda_stream; if (msg.msg_type() == ActorMsgType::kCmdMsg) { TODO(); @@ -55,7 +55,7 @@ void FwDataCompActor::ProcessMsg(const ActorMsg& msg, } } -void FwDataCompActor::WardKernelAndSendMsg(const KernelContext& kernel_ctx) { +void FwDataCompActor::WardKernelAndSendMsg(const KernelCtx& kernel_ctx) { CHECK_EQ(in_.front()->piece_id(), expected_piece_id()); ready_in_regst_[in_.front()->regst_desc_id()] = in_.front(); uint64_t piece_id = in_.front()->piece_id(); diff --git a/oneflow/core/actor/fw_data_comp_actor.h b/oneflow/core/actor/fw_data_comp_actor.h index e088a5ba75..17362eeace 100644 --- a/oneflow/core/actor/fw_data_comp_actor.h +++ b/oneflow/core/actor/fw_data_comp_actor.h @@ -16,7 +16,7 @@ public: private: bool IsReadReady(); - void WardKernelAndSendMsg(const KernelContext&); + void WardKernelAndSendMsg(const KernelCtx&); uint64_t expected_model_version_id_; uint64_t model_regst_desc_id_; diff --git a/oneflow/core/actor/model_update_comp_actor.cpp b/oneflow/core/actor/model_update_comp_actor.cpp index d417b2f488..b78190f592 100644 --- a/oneflow/core/actor/model_update_comp_actor.cpp +++ b/oneflow/core/actor/model_update_comp_actor.cpp @@ -12,14 +12,14 @@ void MdUpdtCompActor::Init(const TaskProto& task_proto) { void MdUpdtCompActor::ProcessMsg(const ActorMsg& actor_msg, const ThreadContext& thread_ctx) { - KernelContext kernel_ctx; + KernelCtx kernel_ctx; kernel_ctx.cuda_stream = thread_ctx.compute_cuda_stream; (this->*cur_handle_)(actor_msg, kernel_ctx); } void MdUpdtCompActor::HandleBeforeInitializeModel( const ActorMsg& actor_msg, - const KernelContext& kernel_ctx) { + const KernelCtx& kernel_ctx) { CHECK(actor_msg.actor_cmd() == ActorCmd::kInitializeModel); Regst* model_regst = GetCurWriteableRegst(model_regst_desc_id_); model_regst->set_model_version_id(0); @@ -46,7 +46,7 @@ void MdUpdtCompActor::HandleBeforeInitializeModel( void MdUpdtCompActor::HandleBeforeSendInitialModel( const ActorMsg& actor_msg, - const KernelContext& kernel_ctx) { + const KernelCtx& kernel_ctx) { CHECK(actor_msg.actor_cmd() == ActorCmd::kSendInitialModel); CurWriteDone(); SetReadOnlyForRegstDescId(model_tmp_regst_desc_id_); @@ -55,7 +55,7 @@ void MdUpdtCompActor::HandleBeforeSendInitialModel( void MdUpdtCompActor::HandleForUpdateModel( const ActorMsg& actor_msg, - const KernelContext& kernel_ctx) { + const KernelCtx& kernel_ctx) { if (actor_msg.msg_type() == ActorMsgType::kCmdMsg) { CHECK(actor_msg.actor_cmd() == ActorCmd::kStop); TODO(); @@ -69,7 +69,7 @@ void MdUpdtCompActor::HandleForUpdateModel( void MdUpdtCompActor::ProcessRegstFromMsg( std::shared_ptr regst_warpper, - const KernelContext& kernel_ctx) { + const KernelCtx& kernel_ctx) { if (TryUpdtStateAsFromRegstReader(regst_warpper->regst_raw_ptr()) != 0) { waiting_model_diff_acc_queue_.push(regst_warpper); } diff --git a/oneflow/core/actor/model_update_comp_actor.h b/oneflow/core/actor/model_update_comp_actor.h index 704810637b..579765b961 100644 --- a/oneflow/core/actor/model_update_comp_actor.h +++ b/oneflow/core/actor/model_update_comp_actor.h @@ -15,13 +15,13 @@ class MdUpdtCompActor final : public CompActor { void ProcessMsg(const ActorMsg&, const ThreadContext&) override; private: - void HandleBeforeInitializeModel(const ActorMsg&, const KernelContext&); - void HandleBeforeSendInitialModel(const ActorMsg&, const KernelContext&); - void HandleForUpdateModel(const ActorMsg&, const KernelContext&); + void HandleBeforeInitializeModel(const ActorMsg&, const KernelCtx&); + void HandleBeforeSendInitialModel(const ActorMsg&, const KernelCtx&); + void HandleForUpdateModel(const ActorMsg&, const KernelCtx&); - void ProcessRegstFromMsg(std::shared_ptr, const KernelContext&); + void ProcessRegstFromMsg(std::shared_ptr, const KernelCtx&); - void (MdUpdtCompActor::*cur_handle_)(const ActorMsg&, const KernelContext&); + void (MdUpdtCompActor::*cur_handle_)(const ActorMsg&, const KernelCtx&); uint64_t model_regst_desc_id_; uint64_t model_tmp_regst_desc_id_; std::queue> waiting_model_diff_acc_queue_; diff --git a/oneflow/core/kernel/convolution_kernel.cpp b/oneflow/core/kernel/convolution_kernel.cpp index 463231bc4b..017ba0bd93 100644 --- a/oneflow/core/kernel/convolution_kernel.cpp +++ b/oneflow/core/kernel/convolution_kernel.cpp @@ -5,19 +5,19 @@ namespace oneflow { template void ConvolutionKernel::Forward( - const KernelContext&, + const KernelCtx&, std::function bn_in_op2blob_ptr) const { TODO(); } template void ConvolutionKernel::Backward( - const KernelContext&, + const KernelCtx&, std::function bn_in_op2blob_ptr) const { TODO(); } INSTANTIATE_CPU_KERNEL_CLASS(ConvolutionKernel); -REGISTER_KERNEL(OperatorConf::kConvolutionConf, ConvolutionKernel); +REGISTER_CPU_KERNEL(OperatorConf::kConvolutionConf, ConvolutionKernel); } // namespace oneflow diff --git a/oneflow/core/kernel/convolution_kernel.cu b/oneflow/core/kernel/convolution_kernel.cu index 3f6d0a837e..7febef1dfd 100644 --- a/oneflow/core/kernel/convolution_kernel.cu +++ b/oneflow/core/kernel/convolution_kernel.cu @@ -5,18 +5,20 @@ namespace oneflow { template void ConvolutionKernel::Forward( - const KernelContext&, + const KernelCtx&, std::function bn_in_op2blob_ptr) const { TODO(); } template void ConvolutionKernel::Backward( - const KernelContext&, + const KernelCtx&, std::function bn_in_op2blob_ptr) const { TODO(); } INSTANTIATE_GPU_KERNEL_CLASS(ConvolutionKernel); +REGISTER_GPU_KERNEL(OperatorConf::kConvolutionConf, ConvolutionKernel); + } // namespace oneflow diff --git a/oneflow/core/kernel/convolution_kernel.h b/oneflow/core/kernel/convolution_kernel.h index d4936b3b8f..9e2f6c55bf 100644 --- a/oneflow/core/kernel/convolution_kernel.h +++ b/oneflow/core/kernel/convolution_kernel.h @@ -19,8 +19,8 @@ class ConvolutionKernel final : public Ke ConvolutionKernel() = default; ~ConvolutionKernel() = default; - void Forward(const KernelContext&, std::function) const override; - void Backward(const KernelContext&, std::function) const override; + void Forward(const KernelCtx&, std::function) const override; + void Backward(const KernelCtx&, std::function) const override; }; template @@ -30,8 +30,8 @@ class ConvolutionKernel final : public Ke ConvolutionKernel() = default; ~ConvolutionKernel() = default; - void Forward(const KernelContext&, std::function) const override; - void Backward(const KernelContext&, std::function) const override; + void Forward(const KernelCtx&, std::function) const override; + void Backward(const KernelCtx&, std::function) const override; }; } // namespace oneflow diff --git a/oneflow/core/kernel/kernel.cpp b/oneflow/core/kernel/kernel.cpp index 507e5b1523..c1e20defc7 100644 --- a/oneflow/core/kernel/kernel.cpp +++ b/oneflow/core/kernel/kernel.cpp @@ -9,7 +9,7 @@ void Kernel::InitFromOpProto(const OperatorProto& op_proto) { } void Kernel::InitModelAndModelTmpBlobs( - const KernelContext& ctx, + const KernelCtx& ctx, std::function Blob4BnInOp) const { TODO(); } diff --git a/oneflow/core/kernel/kernel.h b/oneflow/core/kernel/kernel.h index 5548ce2e27..12d888e71b 100644 --- a/oneflow/core/kernel/kernel.h +++ b/oneflow/core/kernel/kernel.h @@ -9,13 +9,10 @@ #include "oneflow/core/operator/operator.h" #include "oneflow/core/operator/operator_manager.h" #include "oneflow/core/operator/operator.pb.h" +#include "oneflow/core/kernel/kernel_context.h" namespace oneflow { -struct KernelContext { - const cudaStream_t* cuda_stream; -}; - class Kernel { public: OF_DISALLOW_COPY_AND_MOVE(Kernel); @@ -24,17 +21,17 @@ class Kernel { void InitFromOpProto(const OperatorProto& op_proto); void InitModelAndModelTmpBlobs( - const KernelContext& ctx, + const KernelCtx& ctx, std::function Blob4BnInOp) const; // for Forward / Bp Calculation in FwExecGragh node and BpExecGragh node // through bn_in_op2blob_ptr function get the input blob and output blob // the Kernel will using the input blob calculate the result and fill output virtual void Forward( - const KernelContext& ctx, + const KernelCtx& ctx, std::function) const = 0; virtual void Backward( - const KernelContext& ctx, + const KernelCtx& ctx, std::function) const = 0; // @@ -51,7 +48,7 @@ class Kernel { }; using KernelWardFunc = void (Kernel::*)( - const KernelContext&, std::function) const; + const KernelCtx&, std::function) const; #define INSTANTIATE_CPU_KERNEL_CLASS(classname) \ char gInstantiationGuardCPU##classname; \ diff --git a/oneflow/core/kernel/kernel_context.h b/oneflow/core/kernel/kernel_context.h new file mode 100644 index 0000000000..f567fa4346 --- /dev/null +++ b/oneflow/core/kernel/kernel_context.h @@ -0,0 +1,16 @@ +#ifndef ONEFLOW_CORE_KERNEL_KERNEL_CONTEXT_H_ +#define ONEFLOW_CORE_KERNEL_KERNEL_CONTEXT_H_ + +#include "oneflow/core/common/util.h" +#include "oneflow/core/common/channel.h" + +namespace oneflow { + +struct KernelCtx { + Channel>* cpu_channel; + const cudaStream_t* cuda_stream; +}; + +} // namespace oneflow + +#endif // ONEFLOW_CORE_KERNEL_KERNEL_CONTEXT_H_ diff --git a/oneflow/core/kernel/kernel_manager.h b/oneflow/core/kernel/kernel_manager.h index 70ec21719a..7449813121 100644 --- a/oneflow/core/kernel/kernel_manager.h +++ b/oneflow/core/kernel/kernel_manager.h @@ -67,9 +67,11 @@ struct GpuDoubleKernelRegister { } }; -#define REGISTER_KERNEL(OpTypeCase, KernelType) \ +#define REGISTER_CPU_KERNEL(OpTypeCase, KernelType) \ static CpuFloatKernelRegister> g_##KernelType##_cpu_float_regst_var; \ - static CpuDoubleKernelRegister> g_##KernelType##_cpu_double_regst_var; \ + static CpuDoubleKernelRegister> g_##KernelType##_cpu_double_regst_var; + +#define REGISTER_GPU_KERNEL(OpTypeCase, KernelType) \ static GpuFloatKernelRegister> g_##KernelType##_gpu_float_regst_var; \ static GpuDoubleKernelRegister> g_##KernelType##_gpu_double_regst_var; -- GitLab