From fa5060a2488e24f90def1174b88384c8f519130f Mon Sep 17 00:00:00 2001 From: Juncheng Date: Thu, 26 Nov 2020 10:25:32 +0800 Subject: [PATCH] Remove NormalModelUpdateOpConf (#3917) * Remove NormalModelUpdateOpConf * Remove useless import * Remove TaskNode/Actor * fix Former-commit-id: bab1c62adadc08138311600bde3b041a060a0e61 --- .../actor/normal_forward_compute_actor.cpp | 1 - oneflow/core/graph/logical_node.cpp | 7 -- oneflow/core/graph/logical_node.h | 7 -- oneflow/core/graph/op_graph.cpp | 1 - .../graph/optimizer_compute_task_node.cpp | 48 ------------- .../core/graph/optimizer_compute_task_node.h | 48 ------------- oneflow/core/job/task.proto | 1 - oneflow/core/job_rewriter/optimizer.cpp | 26 ------- oneflow/core/job_rewriter/optimizer.h | 4 -- oneflow/core/kernel/kernel.proto | 12 ---- .../kernel/normal_model_update_kernel.cpp | 40 ----------- .../core/kernel/normal_model_update_kernel.h | 65 ------------------ .../core/operator/normal_model_update_op.cpp | 68 ------------------- .../core/operator/normal_model_update_op.h | 55 --------------- oneflow/core/operator/op_conf.proto | 10 --- oneflow/xrt/launch_op.h | 2 - 16 files changed, 395 deletions(-) delete mode 100644 oneflow/core/graph/optimizer_compute_task_node.cpp delete mode 100644 oneflow/core/graph/optimizer_compute_task_node.h delete mode 100644 oneflow/core/kernel/normal_model_update_kernel.cpp delete mode 100644 oneflow/core/kernel/normal_model_update_kernel.h delete mode 100644 oneflow/core/operator/normal_model_update_op.cpp delete mode 100644 oneflow/core/operator/normal_model_update_op.h diff --git a/oneflow/core/actor/normal_forward_compute_actor.cpp b/oneflow/core/actor/normal_forward_compute_actor.cpp index be2f38f5a5..19df11afdb 100644 --- a/oneflow/core/actor/normal_forward_compute_actor.cpp +++ b/oneflow/core/actor/normal_forward_compute_actor.cpp @@ -67,7 +67,6 @@ void NormalForwardCompActor::AsyncInitModelAndConstBuf() { } REGISTER_ACTOR(TaskType::kNormalForward, NormalForwardCompActor); -REGISTER_ACTOR(TaskType::kOptimizer, NormalForwardCompActor); REGISTER_ACTOR(TaskType::kPrint, NormalForwardCompActor); REGISTER_ACTOR(TaskType::kForeignInput, NormalForwardCompActor); REGISTER_ACTOR(TaskType::kForeignOutput, NormalForwardCompActor); diff --git a/oneflow/core/graph/logical_node.cpp b/oneflow/core/graph/logical_node.cpp index f38c9b5db2..c599770f2a 100644 --- a/oneflow/core/graph/logical_node.cpp +++ b/oneflow/core/graph/logical_node.cpp @@ -15,7 +15,6 @@ limitations under the License. */ #include "oneflow/core/graph/logical_node.h" #include "oneflow/core/graph/normal_forward_compute_task_node.h" -#include "oneflow/core/graph/optimizer_compute_task_node.h" #include "oneflow/core/graph/print_compute_task_node.h" #include "oneflow/core/graph/decode_compute_task_node.h" #include "oneflow/core/graph/decode_random_compute_task_node.h" @@ -294,12 +293,6 @@ int64_t NormalForwardLogicalNode::GetAreaId() const { } } -std::string OptimizerLogicalNode::TypeName() const { return "Optimizer"; } - -CompTaskNode* OptimizerLogicalNode::NewCompTaskNode() const { return new OptimizerCompTaskNode; } - -int64_t OptimizerLogicalNode::GetAreaId() const { return kMdUpdtArea; } - int64_t NewAreaId() { static int64_t next_area_id = AreaType_ARRAYSIZE; return ++next_area_id; diff --git a/oneflow/core/graph/logical_node.h b/oneflow/core/graph/logical_node.h index e7dd3f4f4b..eb142d6dd4 100644 --- a/oneflow/core/graph/logical_node.h +++ b/oneflow/core/graph/logical_node.h @@ -147,13 +147,6 @@ class NormalForwardLogicalNode final : public ForwardLogicalNode { private: }; -class OptimizerLogicalNode final : public ForwardLogicalNode { - public: - LOGICAL_NODE_BOILERPLATE(OptimizerLogicalNode); - - private: -}; - int64_t NewAreaId(); #define LOGICAL_NODE_WITH_NEW_AREA_ID_BOILERPLATE(name) \ diff --git a/oneflow/core/graph/op_graph.cpp b/oneflow/core/graph/op_graph.cpp index e36be43690..af80a06010 100644 --- a/oneflow/core/graph/op_graph.cpp +++ b/oneflow/core/graph/op_graph.cpp @@ -16,7 +16,6 @@ limitations under the License. #include "oneflow/core/graph/op_graph.h" #include "oneflow/core/job/job_builder.h" #include "oneflow/core/job/mirrored_sig_infer_hint.h" -#include "oneflow/core/operator/normal_model_update_op.h" namespace oneflow { diff --git a/oneflow/core/graph/optimizer_compute_task_node.cpp b/oneflow/core/graph/optimizer_compute_task_node.cpp deleted file mode 100644 index ffdc5f583c..0000000000 --- a/oneflow/core/graph/optimizer_compute_task_node.cpp +++ /dev/null @@ -1,48 +0,0 @@ -/* -Copyright 2020 The OneFlow Authors. All rights reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ -#include "oneflow/core/graph/logical_node.h" -#include "oneflow/core/graph/optimizer_compute_task_node.h" - -namespace oneflow { - -void OptimizerCompTaskNode::ConsumeAllRegsts() { - ForEachInDataEdge([&](TaskEdge* edge) { - for (const auto& regst : edge->GetRegsts()) { ConsumeRegst("in", regst); } - }); -} - -void OptimizerCompTaskNode::ProduceAllRegstsAndBindEdges() { ProduceRegst("tmp", false, 1, 1); } - -void OptimizerCompTaskNode::BuildExecGphAndRegst() { - ExecNode* node = mut_exec_gph().NewNode(); - std::shared_ptr sole_op = this->logical_node()->SoleOp(); - node->mut_op() = sole_op; - const std::list>& in_regsts = GetConsumedRegst("in"); - for (const auto& ibn : node->op()->input_bns()) { - node->BindBnWithOneOfTheRegsts(ibn, in_regsts); - } - node->AddBnToRegstAndBindIt(&Operator::tmp_bns, GetProducedRegst("tmp")); - node->InferBlobDescs(parallel_ctx()); -} - -void OptimizerCompTaskNode::InferProducedDataRegstTimeShape() { - ForEachProducedDataRegst([](const std::string& name, RegstDesc* regst) { - regst->mut_data_regst_time_shape()->reset( - new Shape({GlobalJobDesc().TotalBatchNum(), static_cast(1)})); - }); -} - -} // namespace oneflow diff --git a/oneflow/core/graph/optimizer_compute_task_node.h b/oneflow/core/graph/optimizer_compute_task_node.h deleted file mode 100644 index 29eab2c2b7..0000000000 --- a/oneflow/core/graph/optimizer_compute_task_node.h +++ /dev/null @@ -1,48 +0,0 @@ -/* -Copyright 2020 The OneFlow Authors. All rights reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ -#ifndef ONEFLOW_CORE_GRAPH_OPTIMIZER_COMPUTE_TASK_NODE_H_ -#define ONEFLOW_CORE_GRAPH_OPTIMIZER_COMPUTE_TASK_NODE_H_ - -#include "oneflow/core/graph/compute_task_node.h" - -namespace oneflow { - -class OptimizerCompTaskNode final : public CompTaskNode { - public: - OF_DISALLOW_COPY_AND_MOVE(OptimizerCompTaskNode); - OptimizerCompTaskNode() = default; - ~OptimizerCompTaskNode() = default; - - void ProduceAllRegstsAndBindEdges() override; - void ConsumeAllRegsts() override; - - TaskType GetTaskType() const override { return TaskType::kOptimizer; } - CudaWorkType GetCudaWorkType() const override { -#ifdef WITH_CUDA - return CudaWorkType::kCompute; -#else - UNIMPLEMENTED(); -#endif - } - - private: - void BuildExecGphAndRegst() override; - void InferProducedDataRegstTimeShape() override; -}; - -} // namespace oneflow - -#endif // ONEFLOW_CORE_GRAPH_OPTIMIZER_COMPUTE_TASK_NODE_H_ diff --git a/oneflow/core/job/task.proto b/oneflow/core/job/task.proto index ddcf05304e..01878daae2 100644 --- a/oneflow/core/job/task.proto +++ b/oneflow/core/job/task.proto @@ -19,7 +19,6 @@ enum TaskType { kUnpack = 32; kRepeat = 34; kAcc = 37; - kOptimizer = 38; kSourceTick = 40; kTick = 41; kAccTick = 42; diff --git a/oneflow/core/job_rewriter/optimizer.cpp b/oneflow/core/job_rewriter/optimizer.cpp index 9347065a10..356e40e3f1 100644 --- a/oneflow/core/job_rewriter/optimizer.cpp +++ b/oneflow/core/job_rewriter/optimizer.cpp @@ -81,32 +81,6 @@ float GetOptimizerWeightDecayRate(const NormalModelUpdateOpUserConf& model_updat } } -template -void ConstructMdUpdtOpConf(const VariableOp& op, const LogicalBlobId& diff_lbi_of_var_out, - JobBuilder* job_builder, T* mdupdt_op_conf) { - const auto& train_conf = job_builder->job().job_conf().train_conf(); - *mdupdt_op_conf->mutable_user_conf() = train_conf.model_update_conf(); - mdupdt_op_conf->set_model_diff(GenLogicalBlobName(diff_lbi_of_var_out)); - mdupdt_op_conf->set_model(GenLogicalBlobName(op.BnInOp2Lbi("out"))); - mdupdt_op_conf->set_train_step(train_conf.train_step_lbn()); - const std::string& primary_lr_lbn = train_conf.primary_lr_lbn(); - const std::string& secondary_lr_lbn = train_conf.secondary_lr_lbn(); - if (op.op_conf().variable_conf().model_name() == "weight") { - mdupdt_op_conf->set_learning_rate(primary_lr_lbn); - } else if (op.op_conf().variable_conf().model_name() == "bias") { - mdupdt_op_conf->set_learning_rate(secondary_lr_lbn); - } else { - mdupdt_op_conf->set_learning_rate(primary_lr_lbn); - } - const float weight_decay_rate = GetOptimizerWeightDecayRate(train_conf.model_update_conf(), op); - if (weight_decay_rate != 0) { mdupdt_op_conf->set_weight_decay(weight_decay_rate); } -} - -#define INSTANTIATE_CONSTRUCTOR_MDUPDT_OP_CONF(T) \ - template void ConstructMdUpdtOpConf(const VariableOp& op, \ - const LogicalBlobId& diff_lbi_of_var_out, \ - JobBuilder* job_builder, T* mdupdt_op_conf) - void SetDynamicLossScaleSkipIf(JobPassCtx* ctx, user_op::UserOpConfWrapperBuilder* builder) { if (!ctx->job_desc().job_conf().train_conf().has_dynamic_loss_scale_policy()) { return; } builder->Input("skip_if", diff --git a/oneflow/core/job_rewriter/optimizer.h b/oneflow/core/job_rewriter/optimizer.h index 422c39a63d..d2c125601d 100644 --- a/oneflow/core/job_rewriter/optimizer.h +++ b/oneflow/core/job_rewriter/optimizer.h @@ -31,10 +31,6 @@ float GetOptimizerWeightDecayRate(const NormalModelUpdateOpUserConf& model_updat void SetDynamicLossScaleSkipIf(JobPassCtx* ctx, user_op::UserOpConfWrapperBuilder* builder); -template -void ConstructMdUpdtOpConf(const VariableOp& op, const LogicalBlobId& diff_lbi_of_var_out, - JobBuilder* job_builder, T*); - class GenerateOptimizerOpConfWrapperStruct final { public: using Func = std::function -void NormalMdUpdateKernel::VirtualKernelInit() { - const PbMessage& op_conf = this->GetCustomizedOpConf(); - weight_decay_ = static_cast(GetValFromPbMessage(op_conf, "weight_decay")); - if (!IsWeightDecaySupported()) { CHECK_EQ(weight_decay_, static_cast(0)); } -} - -template -void NormalMdUpdateKernel::ForwardDataContent( - const KernelCtx& ctx, std::function BnInOp2Blob) const { - const int64_t* train_step_ptr = BnInOp2Blob("train_step")->dptr(); - const float* learning_rate_ptr = BnInOp2Blob("learning_rate")->dptr(); - UpdateModel(ctx.device_ctx, weight_decay_, train_step_ptr, learning_rate_ptr, BnInOp2Blob); -} - -#define INSTANTIATE_KERNEL(device_type, data_type_pair) \ - template class NormalMdUpdateKernel; -OF_PP_SEQ_PRODUCT_FOR_EACH_TUPLE(INSTANTIATE_KERNEL, DEVICE_TYPE_SEQ, FLOATING_DATA_TYPE_SEQ) -#undef INSTANTIATE_KERNEL - -} // namespace oneflow diff --git a/oneflow/core/kernel/normal_model_update_kernel.h b/oneflow/core/kernel/normal_model_update_kernel.h deleted file mode 100644 index d6108418f3..0000000000 --- a/oneflow/core/kernel/normal_model_update_kernel.h +++ /dev/null @@ -1,65 +0,0 @@ -/* -Copyright 2020 The OneFlow Authors. All rights reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ -#ifndef ONEFLOW_CORE_KERNEL_NORMAL_MODEL_UPDATE_KERNEL_H_ -#define ONEFLOW_CORE_KERNEL_NORMAL_MODEL_UPDATE_KERNEL_H_ - -#include "oneflow/core/framework/to_string.h" -#include "oneflow/core/kernel/kernel.h" - -namespace oneflow { - -template -class NormalMdUpdateKernel : public KernelIf { - public: - OF_DISALLOW_COPY_AND_MOVE(NormalMdUpdateKernel); - virtual ~NormalMdUpdateKernel() = default; - - protected: - NormalMdUpdateKernel() = default; - virtual void UpdateModel(DeviceCtx* ctx, T weight_decay, const int64_t* train_step, - const float* learning_rate, - std::function BnInOp2Blob) const = 0; - virtual bool IsWeightDecaySupported() { return false; } - - void Forward(const KernelCtx& ctx, - std::function BnInOp2Blob) const override { - ForwardDataContent(ctx, BnInOp2Blob); - } - - private: - void ForwardDataContent(const KernelCtx& ctx, - std::function BnInOp2Blob) const override; - - void VirtualKernelInit() override; - - T weight_decay_; -}; - -#define DECLARE_MDUPDT_KERNEL_CREATOR(x) Kernel* Create##x##MdUpdtKernel(const KernelConf&); - -#define DEFINE_MDUPDT_KERNEL_CREATOR(x) \ - Kernel* Create##x##MdUpdtKernel(const KernelConf& kernel_conf) { \ - static const HashMap> creators = { \ - OF_PP_SEQ_PRODUCT_FOR_EACH_TUPLE(MAKE_KERNEL_CREATOR_ENTRY, (x##MdUpdateKernel), \ - DEVICE_TYPE_SEQ, FLOATING_DATA_TYPE_SEQ)}; \ - DeviceType device_type = \ - CHECK_JUST(DeviceType4DeviceTag(kernel_conf.op_attribute().op_conf().device_tag())); \ - return creators.at(GetHashKey(device_type, kernel_conf.data_type()))(); \ - } - -} // namespace oneflow - -#endif // ONEFLOW_CORE_KERNEL_NORMAL_MODEL_UPDATE_KERNEL_H_ diff --git a/oneflow/core/operator/normal_model_update_op.cpp b/oneflow/core/operator/normal_model_update_op.cpp deleted file mode 100644 index 34a903c4d7..0000000000 --- a/oneflow/core/operator/normal_model_update_op.cpp +++ /dev/null @@ -1,68 +0,0 @@ -/* -Copyright 2020 The OneFlow Authors. All rights reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ -#include "oneflow/core/operator/normal_model_update_op.h" -#include "oneflow/core/job/sbp_signature_builder.h" - -namespace oneflow { - -void NormalModelUpdtOp::InitFromOpConf() { - EnrollInputBn("model_diff", false); - EnrollInputBn("model", false)->set_is_mutable(true); - EnrollInputBn("learning_rate", false); - EnrollInputBn("train_step", false); - MdUpdtVirtualInitFromOpConf(); -} - -Maybe NormalModelUpdtOp::InferBlobDescs( - std::function GetBlobDesc4BnInOp, - const ParallelContext* parallel_ctx) const { - return MdUpdtVirtualInferBlobDescs(GetBlobDesc4BnInOp, parallel_ctx); -} - -LogicalBlobId NormalModelUpdtOp::lbi4obn(const std::string& output_bn) const { - const google::protobuf::Descriptor* desc = GetCustomizedConf().GetDescriptor(); - const google::protobuf::FieldDescriptor* fd = desc->FindFieldByName(output_bn); - CHECK(fd); - return GenLogicalBlobId(GetValFromCustomizedConf(output_bn)); -} - -Maybe NormalModelUpdtOp::InferBatchAxis( - std::function BatchAxis4BnInOp) const { - return Maybe::Ok(); -} - -Maybe NormalModelUpdtOp::GetSbpSignatures( - const std::function(const std::string&)>& LogicalBlobDesc4Ibn, - SbpSignatureList* sbp_sig_list) const { - const auto& bns = AlwaysBroadcastParallelBns(); - PbRpf broadcast_bns = {bns.begin(), bns.end()}; - *broadcast_bns.Add() = "learning_rate"; - *broadcast_bns.Add() = "train_step"; - FOR_RANGE(int64_t, i, 0, JUST(LogicalBlobDesc4Ibn("model")).shape().NumAxes()) { - SbpSignatureBuilder() - .Split(input_bns(), i) - .Broadcast(broadcast_bns) - .Build(sbp_sig_list->mutable_sbp_signature()->Add()); - } - return Maybe::Ok(); -} - -REGISTER_OP_CREATOR(OperatorConf::kNormalMdupdtConf, ([](const OperatorConf& op_conf) -> Operator* { - return NewObj( - op_conf.normal_mdupdt_conf().user_conf().normal_mdupdt_case()); - })); - -} // namespace oneflow diff --git a/oneflow/core/operator/normal_model_update_op.h b/oneflow/core/operator/normal_model_update_op.h deleted file mode 100644 index 16500e85d3..0000000000 --- a/oneflow/core/operator/normal_model_update_op.h +++ /dev/null @@ -1,55 +0,0 @@ -/* -Copyright 2020 The OneFlow Authors. All rights reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ -#ifndef ONEFLOW_CORE_OPERATOR_NORMAL_MODEL_UPDATE_OP_H_ -#define ONEFLOW_CORE_OPERATOR_NORMAL_MODEL_UPDATE_OP_H_ - -#include "oneflow/core/operator/operator.h" - -namespace oneflow { - -class NormalModelUpdtOp : public Operator { - public: - OF_DISALLOW_COPY_AND_MOVE(NormalModelUpdtOp); - virtual ~NormalModelUpdtOp() = default; - - void InitFromOpConf() override; - Maybe InferBlobDescs(std::function GetBlobDesc4BnInOp, - const ParallelContext* parallel_ctx) const override; - - protected: - NormalModelUpdtOp() = default; - virtual void MdUpdtVirtualInitFromOpConf() {} - virtual Maybe MdUpdtVirtualInferBlobDescs( - std::function GetBlobDesc4BnInOp, - const ParallelContext*) const { - return Maybe::Ok(); - } - - virtual const HashSet AlwaysBroadcastParallelBns() const = 0; - - private: - Maybe InferBatchAxis( - std::function BatchAxis4BnInOp) const override; - Maybe GetSbpSignatures( - const std::function(const std::string&)>& LogicalBlobDesc4Ibn, - SbpSignatureList* sbp_sig_list) const override; - - LogicalBlobId lbi4obn(const std::string& output_bn) const override; -}; - -} // namespace oneflow - -#endif // ONEFLOW_CORE_OPERATOR_NORMAL_MODEL_UPDATE_OP_H_ diff --git a/oneflow/core/operator/op_conf.proto b/oneflow/core/operator/op_conf.proto index babb751d00..de58534de0 100644 --- a/oneflow/core/operator/op_conf.proto +++ b/oneflow/core/operator/op_conf.proto @@ -351,15 +351,6 @@ message NormalModelUpdateOpUserConf { } } -message NormalModelUpdateOpConf { - required NormalModelUpdateOpUserConf user_conf = 1; - required string model_diff = 2; - required string model = 4; - required string train_step = 5; - required string learning_rate = 6; - optional float weight_decay = 7 [default = 0.0]; -} - message AccumulateOpConf { } @@ -888,7 +879,6 @@ message OperatorConf { CopyCommNetOpConf copy_comm_net_conf = 106; BoxingOpConf boxing_conf = 108; AccumulateOpConf accumulate_conf = 117; - NormalModelUpdateOpConf normal_mdupdt_conf = 118; VariableOpConf variable_conf = 122; TickOpConf tick_conf = 124; KeepHeaderOnlyOpConf keep_header_only_conf = 125; diff --git a/oneflow/xrt/launch_op.h b/oneflow/xrt/launch_op.h index 5d4a5e9081..834661e8e2 100644 --- a/oneflow/xrt/launch_op.h +++ b/oneflow/xrt/launch_op.h @@ -31,8 +31,6 @@ class XrtLaunchOp : public Operator { const ParallelContext* parallel_ctx) const override; LogicalNode* NewProperLogicalNode() const override { - const auto& launch_conf = op_conf().xrt_launch_conf(); - if (launch_conf.model_update()) { return new OptimizerLogicalNode; } return new NormalForwardLogicalNode; } -- GitLab