From 7fe0297e6451db4f18df81a8716414acca819529 Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Fri, 29 Sep 2017 18:38:36 -0700 Subject: [PATCH] remove Runtime InferShape for cond op (#4518) * init remove cond_op infershape * optimize code * add PrepareDataForSubnet and MergeDataFromSubnet --- paddle/operators/cond_op.cc | 242 +++++++++--------- paddle/operators/cond_op.h | 33 +-- paddle/operators/scatter.h | 4 - .../paddle/v2/framework/tests/test_cond_op.py | 3 - 4 files changed, 140 insertions(+), 142 deletions(-) diff --git a/paddle/operators/cond_op.cc b/paddle/operators/cond_op.cc index aaffa6661fe..db20b69f3fa 100644 --- a/paddle/operators/cond_op.cc +++ b/paddle/operators/cond_op.cc @@ -14,12 +14,7 @@ limitations under the License. */ #include "paddle/operators/cond_op.h" -#include -#include - -#include "paddle/framework/op_registry.h" #include "paddle/operators/gather.h" -#include "paddle/operators/net_op.h" #include "paddle/operators/scatter.h" namespace paddle { @@ -31,142 +26,104 @@ using Tensor = framework::Tensor; using LoDTensor = framework::LoDTensor; using DDim = framework::DDim; -void CondOp::CreateScope(const Scope& scope) const { +framework::Scope& CondOp::AddSubScope(const Scope& scope) const { auto sub_scopes_var = scope.FindVar("SubScopes"); PADDLE_ENFORCE_NOT_NULL(sub_scopes_var, "Output(SubScopes) of CondOp should not be null."); auto sub_scopes = sub_scopes_var->GetMutable>(); auto& sub_scope = scope.NewScope(); sub_scopes->push_back(&sub_scope); + return sub_scope; } -void CondOp::CreateIndexTensor(const Scope& scope) const { +std::vector& CondOp::GetSubScopes( + const framework::Scope& scope) const { + auto sub_scopes_var = scope.FindVar("SubScopes"); + PADDLE_ENFORCE_NOT_NULL(sub_scopes_var, + "Output(SubScopes) of CondOp should not be null."); + return *sub_scopes_var->GetMutable>(); +} + +LoDTensor& CondOp::AddIndexTensor(const Scope& scope) const { auto index_tensors_var = scope.FindVar("IndexTensors"); PADDLE_ENFORCE_NOT_NULL(index_tensors_var, "Output(IndexTensors) of CondOp should not be null."); auto& index_tensors = *index_tensors_var->GetMutable>(); index_tensors.push_back(LoDTensor()); + return index_tensors.back(); } -void CondOp::InferShape(const Scope& scope) const { - auto sub_scopes_var = scope.FindVar("SubScopes"); - PADDLE_ENFORCE_NOT_NULL(sub_scopes_var, - "Output(SubScopes) of CondOp should not be null."); - auto& sub_scopes = *sub_scopes_var->GetMutable>(); - - for (int i = 0; i < 2; ++i) { - // Create two sub scopes for true and false branches - // sub_scopes[0] for the true branch and sub_scopes[1] for the false - // branch - CreateScope(scope); - - // Create two tensors for true and false indices - // index_tensors[0] for the true branch and index_tensors[1] for the false - // branch - CreateIndexTensor(scope); - - PADDLE_ENFORCE(!Inputs("Xs").empty(), - "Inputs(Xs) of CondOp can't be empty."); - for (auto& input : Inputs("Xs")) { - // Create a new tensor in sub-scope for input-type tensor - Variable* v = sub_scopes[i]->NewVar(input); - LoDTensor* sub_input = v->GetMutable(); - sub_input->Resize(scope.FindVar(input)->GetMutable()->dims()); - } - - for (auto& output : (*sub_net_op_[i]).Outputs()) { - for (auto& var_name : output.second) { - sub_scopes[i]->NewVar(var_name); - } - } - - // each net calls InferShape - // sub_net_op_[i]->InferShape(*sub_scopes[i]); - } - - for (auto& output : Outputs("Outs")) { - LoDTensor* tensor_t_out = - sub_scopes[0]->FindVar(output)->GetMutable(); - PADDLE_ENFORCE_NOT_NULL(tensor_t_out, "True output should not be NULL"); - LoDTensor* tensor_f_out = - sub_scopes[1]->FindVar(output)->GetMutable(); - PADDLE_ENFORCE_NOT_NULL(tensor_f_out, "False output should not be NULL"); - - auto* tensor_out_var = scope.FindVar(output); - PADDLE_ENFORCE_NOT_NULL(tensor_out_var, "Output not found"); - LoDTensor* tensor_out = tensor_out_var->GetMutable(); - PADDLE_ENFORCE_NOT_NULL(tensor_t_out, - "True output tensor should not be NULL"); - - // check output size should be same - PADDLE_ENFORCE_EQ(tensor_t_out->dims(), tensor_f_out->dims(), - "Outputs not of the same shape"); - tensor_out->Resize(tensor_t_out->dims()); - // tensor_out->mutable_data(tensor_out->dims(), - // platform::CPUPlace()); - tensor_out->mutable_data(platform::CPUPlace()); - } -} - -void CondOp::Run(const Scope& scope, - const platform::DeviceContext& dev_ctx) const { - auto* sub_scopes_var = scope.FindVar("SubScopes"); - PADDLE_ENFORCE_NOT_NULL(sub_scopes_var, - "Output(SubScopes) of CondOp should not be null."); - auto sub_scopes = sub_scopes_var->Get>(); +std::vector& CondOp::GetIndexTensors( + const framework::Scope& scope) const { auto* index_tensors_var = scope.FindVar("IndexTensors"); PADDLE_ENFORCE_NOT_NULL(index_tensors_var, "Output(IndexTensors) of CondOp should not be null."); - auto index_tensors = index_tensors_var->Get>(); + return *index_tensors_var->GetMutable>(); +} - std::string cond_name = Input("Cond"); - Variable* cond_var = scope.FindVar(cond_name); +void CondOp::PrepareDataForSubnet( + const framework::Scope& scope, + const platform::DeviceContext& dev_ctx) const { + PADDLE_ENFORCE(!Inputs("Xs").empty(), "Inputs(Xs) of CondOp can't be empty."); + + for (int i = 0; i < BRANCH_NUM; ++i) { + // Create two sub scopes for true and false branches + // sub_scopes[0] for the true branch + // sub_scopes[1] for the false branch + AddSubScope(scope); + // Create two tensors for true and false indices: + // index_tensors[0] for the true branch + // index_tensors[1] for the false branch + AddIndexTensor(scope); + } + + Variable* cond_var = scope.FindVar(Input("Cond")); PADDLE_ENFORCE_NOT_NULL(cond_var, "Input(Cond) of CondOp should not be null."); const LoDTensor* cond = cond_var->GetMutable(); - // Step 1: get the true/false index at runtime - // index_[0]: vector, contains all index for cond[i] == true - // index_[1]: vector, contains all index for cond[i] == false - for (int i = 0; i < 2; ++i) index_[i].clear(); + // get the true/false index at runtime according to cond tensor + // index_vectors[0]: vector, contains all index for cond[i] == true + // index_vectors[1]: vector, contains all index for cond[i] == false + std::vector> index_vectors; + index_vectors.resize(BRANCH_NUM); const int* cond_data = cond->data(); for (int i = 0; i < cond->dims()[0]; ++i) { if (cond_data[i]) - index_[0].push_back(i); + index_vectors[TRUE_BRANCH].push_back(i); else - index_[1].push_back(i); + index_vectors[FALSE_BRANCH].push_back(i); } - // put index_[0] and index_[1] into two tensors: - // index_tensor_[0] and index_tensor_[1] - DDim dim = paddle::framework::make_ddim({0}); - for (int i = 0; i < 2; ++i) { - dim[0] = index_[i].size(); - int* tmp_ptr = + // put index_vectors[0] and index_vectors[1] into two tensors: + // index_tensors[0] and index_tensors[1] + std::vector& index_tensors = GetIndexTensors(scope); + std::vector& sub_scopes = GetSubScopes(scope); + + for (int i = 0; i < BRANCH_NUM; ++i) { + DDim dim = {static_cast(index_vectors[i].size())}; + int* index_tensor_data_ptr = index_tensors[i].mutable_data(dim, platform::CPUPlace()); - index_tensors[i].Resize(dim); - memcpy(tmp_ptr, index_[i].data(), dim[0] * sizeof(int)); + memcpy(index_tensor_data_ptr, index_vectors[i].data(), + dim[0] * sizeof(int)); } - // Step 2: collect data by calling gather - for (int i = 0; i < 2; ++i) { - // i= 0/i for True and False branches respectively - for (auto& input : Inputs("Xs")) { - // find Tensor - Variable* v = scope.FindVar(input); - PADDLE_ENFORCE_NOT_NULL(v); - LoDTensor* tensor_parent = v->GetMutable(); + // create input in subscopes according to index_vectors + for (auto& input : Inputs("Xs")) { + Variable* var_parent = scope.FindVar(input); + PADDLE_ENFORCE_NOT_NULL(var_parent); + const auto* tensor_parent = &var_parent->Get(); - v = sub_scopes[i]->FindVar(input); - PADDLE_ENFORCE_NOT_NULL(v); - LoDTensor* tensor_child = v->GetMutable(); + for (int i = 0; i < BRANCH_NUM; ++i) { + Variable* var_child = sub_scopes[i]->FindVar(input); + PADDLE_ENFORCE_NOT_NULL(var_child); + auto* tensor_child = var_child->GetMutable(); // Resize child - DDim dim = tensor_child->dims(); - dim[0] = index_[i].size(); - tensor_child->Resize(dim); + DDim dim = tensor_parent->dims(); + dim[0] = index_tensors[i].dims()[0]; tensor_child->mutable_data(dim, platform::CPUPlace()); Gather(dev_ctx.GetPlace(), tensor_parent, &index_tensors[i], @@ -174,32 +131,79 @@ void CondOp::Run(const Scope& scope, } } - // Step 3: run - for (int i = 0; i < 2; ++i) { - sub_net_op_[i]->Run(*sub_scopes[i], dev_ctx); + // create output_tensors in subscope for sub_net + for (int i = 0; i < BRANCH_NUM; ++i) { + for (auto& output : (*sub_net_op_[i]).Outputs()) { + for (auto& var_name : output.second) { + sub_scopes[i]->NewVar(var_name); + } + } } +} - // Step 4: merge output results +void CondOp::MergeDataFromSubnet(const framework::Scope& scope, + const platform::DeviceContext& dev_ctx) const { + std::vector& sub_scopes = GetSubScopes(scope); + const std::vector& index_tensors = + GetIndexTensors(scope); + + // Infer the output dim, out_dim[0] = true_dim[0] + false_dim[0] PADDLE_ENFORCE(!Outputs("Outs").empty(), "Outputs(Outs) of CondOp can't be empty."); - for (int i = 0; i < 2; ++i) { - // i= 0/i for True and False branches respectively - for (auto& output : Outputs("Outs")) { - // find Tensor - Variable* v = scope.FindVar(output); - PADDLE_ENFORCE_NOT_NULL(v); - LoDTensor* tensor_parent = v->GetMutable(); - - v = sub_scopes[i]->FindVar(output); - PADDLE_ENFORCE_NOT_NULL(v); - LoDTensor* tensor_child = v->GetMutable(); + for (auto& output : Outputs("Outs")) { + const LoDTensor* tensor_t_out = + &sub_scopes[TRUE_BRANCH]->FindVar(output)->Get(); + PADDLE_ENFORCE_NOT_NULL(tensor_t_out, "True output should not be NULL"); + const LoDTensor* tensor_f_out = + &sub_scopes[FALSE_BRANCH]->FindVar(output)->Get(); + PADDLE_ENFORCE_NOT_NULL(tensor_f_out, "False output should not be NULL"); + + auto* var_out = scope.FindVar(output); + PADDLE_ENFORCE_NOT_NULL(var_out, "Output not found"); + LoDTensor* tensor_out = var_out->GetMutable(); + PADDLE_ENFORCE_NOT_NULL(tensor_t_out, + "True output tensor should not be NULL"); + + DDim true_dim = tensor_t_out->dims(); + DDim false_dim = tensor_f_out->dims(); + true_dim[0] = 0; + false_dim[0] = 0; + PADDLE_ENFORCE_EQ(true_dim, false_dim, + "Outputs not of the same shape except the first dim"); + + DDim out_dim = tensor_t_out->dims(); + out_dim[0] = tensor_t_out->dims()[0] + tensor_f_out->dims()[0]; + tensor_out->Resize(out_dim); + tensor_out->mutable_data(platform::CPUPlace()); + } + // merge output results: + // output_tensor = true_output_tensor + false_output_tensor + for (auto& output : Outputs("Outs")) { + Variable* var_parent = scope.FindVar(output); + PADDLE_ENFORCE_NOT_NULL(var_parent); + auto* tensor_parent = var_parent->GetMutable(); + + for (int i = 0; i < BRANCH_NUM; ++i) { + Variable* var_child = sub_scopes[i]->FindVar(output); + PADDLE_ENFORCE_NOT_NULL(var_child); + auto* tensor_child = &var_child->Get(); ScatterUpdate(dev_ctx.GetPlace(), tensor_child, &index_tensors[i], tensor_parent); } } } +void CondOp::Run(const Scope& scope, + const platform::DeviceContext& dev_ctx) const { + PrepareDataForSubnet(scope, dev_ctx); + std::vector& sub_scopes = GetSubScopes(scope); + for (int i = 0; i < BRANCH_NUM; ++i) { + sub_net_op_[i]->Run(*sub_scopes[i], dev_ctx); + } + MergeDataFromSubnet(scope, dev_ctx); +} + class CondOpProtoAndCheckerMaker : public framework::OpProtoAndCheckerMaker { public: CondOpProtoAndCheckerMaker(framework::OpProto* proto, diff --git a/paddle/operators/cond_op.h b/paddle/operators/cond_op.h index 9a88ee35f10..93121fb31be 100644 --- a/paddle/operators/cond_op.h +++ b/paddle/operators/cond_op.h @@ -40,8 +40,7 @@ class CondOp : public framework::OperatorBase { const framework::VariableNameMap& outputs, const framework::AttributeMap& attrs) : OperatorBase(type, inputs, outputs, attrs) { - index_.resize(2); - sub_net_op_.resize(2); + sub_net_op_.resize(BRANCH_NUM); } CondOp(const CondOp& o) @@ -51,42 +50,44 @@ class CondOp : public framework::OperatorBase { PADDLE_THROW("Not implemented"); } - void CreateScope(const framework::Scope& scope) const; + framework::Scope& AddSubScope(const framework::Scope& scope) const; + std::vector& GetSubScopes( + const framework::Scope& scope) const; - void CreateIndexTensor(const framework::Scope& scope) const; + framework::LoDTensor& AddIndexTensor(const framework::Scope& scope) const; + std::vector& GetIndexTensors( + const framework::Scope& scope) const; - /* - * InferShape must be called before Run. - * FIXME(yuyang18): Since InferShape has been removed, this implementation - * could be wrong. - */ - void InferShape(const framework::Scope& scope) const; + void PrepareDataForSubnet(const framework::Scope& scope, + const platform::DeviceContext& dev_ctx) const; + void MergeDataFromSubnet(const framework::Scope& scope, + const platform::DeviceContext& dev_ctx) const; /* * Set True Block */ void set_truenet(std::unique_ptr&& net) { - sub_net_op_[0] = std::move(net); + sub_net_op_[TRUE_BRANCH] = std::move(net); } /* * Set False Block */ void set_falsenet(std::unique_ptr&& net) { - sub_net_op_[1] = std::move(net); + sub_net_op_[FALSE_BRANCH] = std::move(net); } void Run(const framework::Scope& scope, const platform::DeviceContext& dev_ctx) const override; private: + const int TRUE_BRANCH = 0; + const int FALSE_BRANCH = 1; + const int BRANCH_NUM = 2; + // sub_net_op_[0]: subnet_t // sub_net_op_[1]: subnet_f std::vector> sub_net_op_; - - // index_[0]: True_index; - // index_[1]: False_index; - mutable std::vector> index_; }; } // namespace operators diff --git a/paddle/operators/scatter.h b/paddle/operators/scatter.h index 6b542675c29..fec1046a974 100644 --- a/paddle/operators/scatter.h +++ b/paddle/operators/scatter.h @@ -78,10 +78,6 @@ void ScatterUpdate(const platform::Place& place, for (int i = 1; i < src_dims.size(); i++) PADDLE_ENFORCE(src_dims[i] == dst_dims[i]); - // slice size - size_t slice_size = 1; - for (int i = 0; i < src_dims.size(); ++i) slice_size *= src_dims[i]; - if (platform::is_cpu_place(place)) { CPUScatterUpdate(src, index->data(), index_size, output); } else { diff --git a/python/paddle/v2/framework/tests/test_cond_op.py b/python/paddle/v2/framework/tests/test_cond_op.py index e7a506f2775..3698ce9c8ed 100644 --- a/python/paddle/v2/framework/tests/test_cond_op.py +++ b/python/paddle/v2/framework/tests/test_cond_op.py @@ -112,7 +112,4 @@ class TestCondOp(unittest.TestCase): if __name__ == "__main__": - exit( - 0 - ) # FIXME(yuyang18): Since infer_shape has been removed, cond op may error unittest.main() -- GitLab