From 5943ff7ba319a9b609020d51b0ee5301635cbc80 Mon Sep 17 00:00:00 2001 From: Baibaifan <39549453+Baibaifan@users.noreply.github.com> Date: Sun, 25 Apr 2021 15:59:36 +0800 Subject: [PATCH] add copy_cross_scope (#32432) --- paddle/fluid/operators/CMakeLists.txt | 4 + paddle/fluid/operators/copy_cross_scope_op.cc | 151 +++++++++++++++++ .../fluid/operators/copy_cross_scope_test.cc | 154 ++++++++++++++++++ python/paddle/fluid/framework.py | 3 +- 4 files changed, 311 insertions(+), 1 deletion(-) create mode 100644 paddle/fluid/operators/copy_cross_scope_op.cc create mode 100644 paddle/fluid/operators/copy_cross_scope_test.cc diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 60fa8e319d9..6e11c64afc4 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -199,3 +199,7 @@ endif() if(WITH_ASCEND_CL) cc_test(gelu_op_npu_test SRCS gelu_op_npu_test.cc DEPS op_registry gelu_op scope device_context enforce executor) endif() + +if (WITH_GPU OR WITH_ASCEND_CL) +cc_test(copy_cross_scope_test SRCS copy_cross_scope_test.cc DEPS op_registry copy_cross_scope_op scope device_context enforce executor) +endif() diff --git a/paddle/fluid/operators/copy_cross_scope_op.cc b/paddle/fluid/operators/copy_cross_scope_op.cc new file mode 100644 index 00000000000..721354954c7 --- /dev/null +++ b/paddle/fluid/operators/copy_cross_scope_op.cc @@ -0,0 +1,151 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/var_type_traits.h" + +namespace paddle { +namespace framework { +class OpDesc; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +} // namespace paddle + +using LoDTensor = paddle::framework::LoDTensor; +using Tensor = paddle::framework::Tensor; + +namespace paddle { +namespace operators { + +class CopyCrossScopeOp : public framework::OperatorBase { + public: + CopyCrossScopeOp(const std::string& type, + const framework::VariableNameMap& inputs, + const framework::VariableNameMap& outputs, + const framework::AttributeMap& attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + + void InferShape(framework::InferShapeContext* ctx) const {} + + private: + void RunImpl(const framework::Scope& scope, + const platform::Place& dev_place) const override { + int num_micro_scopes = scope.kids().size(); + int num_micro_batches = Attr("num_micro_batches"); + bool ToM = Attr("to_main_scope"); + PADDLE_ENFORCE_EQ(num_micro_scopes, num_micro_batches, + platform::errors::InvalidArgument( + "For pipeline, number of micro scopes (%d) should " + "be equal to number of micro batches (%d).", + num_micro_scopes, num_micro_batches)); + const std::string& id_name = Input("Id"); + auto* id_var = scope.FindVar(id_name); + PADDLE_ENFORCE_NOT_NULL( + id_var, + platform::errors::NotFound("No variable with name %s found.", id_name)); + auto id_tensor = id_var->GetMutable(); + auto it = scope.kids().begin(); + framework::Tensor cpu_id_tensor; + TensorCopySync(*id_tensor, platform::CPUPlace(), &cpu_id_tensor); + auto id_value = cpu_id_tensor.data(); + for (auto i = 0; i < *id_value; i++) { + it++; + } + if (it == scope.kids().end()) { + if (ToM) { + auto dst_scope = *it; + const std::string& x_name = Input("X"); + auto* dst_var = dst_scope->FindVar(x_name); + PADDLE_ENFORCE_NOT_NULL( + dst_var, + platform::errors::NotFound( + "No variable with name %s found in source scope.", x_name)); + auto* main_var = scope.FindVar(x_name); + PADDLE_ENFORCE_NOT_NULL( + main_var, + platform::errors::NotFound( + "No variable with name %s found in destination scope.", + x_name)); + auto dst_tensor = dst_var->GetMutable(); + auto main_tensor = main_var->GetMutable(); + TensorCopySync(*dst_tensor, main_tensor->place(), main_tensor); + } + return; + } + auto source_scope = *it; + it++; + auto dst_scope = *it; + const std::string& x_name = Input("X"); + auto* source_var = source_scope->FindVar(x_name); + PADDLE_ENFORCE_NOT_NULL( + source_var, + platform::errors::NotFound( + "No variable with name %s found in source scope.", x_name)); + auto* dst_var = dst_scope->FindVar(x_name); + PADDLE_ENFORCE_NOT_NULL( + dst_var, + platform::errors::NotFound( + "No variable with name %s found in destination scope.", x_name)); + auto src_tensor = source_var->GetMutable(); + auto dst_tensor = dst_var->GetMutable(); + TensorCopySync(*src_tensor, dst_tensor->place(), dst_tensor); + + if (ToM) { + auto* main_var = scope.FindVar(x_name); + PADDLE_ENFORCE_NOT_NULL( + main_var, + platform::errors::NotFound( + "No variable with name %s found in destination scope.", x_name)); + auto main_tensor = main_var->GetMutable(); + TensorCopySync(*dst_tensor, main_tensor->place(), main_tensor); + } + } +}; + +class CopyCrossScopeOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", + "(Tensor), The first input tensor of copy_cross_scope op, which " + "is copying micro scope."); + AddInput("Id", + "(Tensor), The second input tensor of copy_cross_scope op, which " + "is a id of the current micro scope."); + AddAttr("to_main_scope", "Return current scope to main scope.") + .SetDefault(false); + AddAttr("num_micro_batches", "Number of micro batches for pipeline."); + AddComment(R"DOC( + This op is used by pipeline to copy tensors across micro batch scopes. + Copy the variable value of the giving Id's micro scope to the micro scope of Id + 1 position. + If need to copy back to the main scope, using to_main_scope option to copy the variable value of + the current micro scope to the main scope. + )DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP_WITHOUT_GRADIENT(copy_cross_scope, ops::CopyCrossScopeOp, + ops::CopyCrossScopeOpMaker); diff --git a/paddle/fluid/operators/copy_cross_scope_test.cc b/paddle/fluid/operators/copy_cross_scope_test.cc new file mode 100644 index 00000000000..e175b235f9c --- /dev/null +++ b/paddle/fluid/operators/copy_cross_scope_test.cc @@ -0,0 +1,154 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifndef _WIN32 +#include +#endif + +#include +#include +#include +#include // NOLINT +#include + +#include "gtest/gtest.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/operators/copy_cross_scope_op.cc" +#include "paddle/fluid/string/printf.h" + +#define Conn(x, y) x##y + +namespace f = paddle::framework; +namespace p = paddle::platform; + +USE_NO_KERNEL_OP(copy_cross_scope); + +template +void Compare1(f::Scope* scope, const p::DeviceContext& ctx, + std::string op_type) { + // init + auto var_x = scope->Var("tmp"); + auto x = var_x->GetMutable(); + std::vector main_x = {1.0}; + TensorFromVector(main_x, ctx, x); + + auto var_id = scope->Var("Id"); + auto id = var_id->GetMutable(); + std::vector main_id = {1}; + TensorFromVector(main_id, ctx, id); + for (int i = 0; i < 3; i++) { + auto& child_scope = scope->NewScope(); + auto child_var = child_scope.Var("tmp"); + auto tensor_x = child_var->GetMutable(); + std::vector init_x = {static_cast(i)}; + TensorFromVector(init_x, ctx, tensor_x); + } + + ctx.Wait(); + + // run + f::AttributeMap attrs = {{"to_main_scope", false}, {"num_micro_batches", 3}}; + std::map> output; + auto op = f::OpRegistry::CreateOp(op_type, {{"X", {"tmp"}}, {"Id", {"Id"}}}, + output, attrs); + + auto place = ctx.GetPlace(); + op->Run(*scope, place); + ctx.Wait(); + + std::list::const_iterator iter = scope->kids().begin(); + iter++; + iter++; + + auto* kid_scope = *iter; + auto* dst_var = kid_scope->FindVar("tmp"); + auto* tensor_out = dst_var->GetMutable(); + + std::vector out_vec; + TensorToVector(*tensor_out, ctx, &out_vec); + + int expected = 1; + EXPECT_EQ(static_cast(out_vec[0]), expected); +} + +template +void Compare2(f::Scope* scope, const p::DeviceContext& ctx, + std::string op_type) { + // init + auto var_x = scope->Var("tmp"); + auto x = var_x->GetMutable(); + std::vector main_x = {1.0}; + TensorFromVector(main_x, ctx, x); + + auto var_id = scope->Var("Id"); + auto id = var_id->GetMutable(); + std::vector main_id = {0}; + TensorFromVector(main_id, ctx, id); + for (int i = 0; i < 3; i++) { + auto& child_scope = scope->NewScope(); + auto child_var = child_scope.Var("tmp"); + auto tensor_x = child_var->GetMutable(); + std::vector init_x = {static_cast(i)}; + TensorFromVector(init_x, ctx, tensor_x); + } + + ctx.Wait(); + + // run + f::AttributeMap attrs = {{"to_main_scope", true}, {"num_micro_batches", 3}}; + std::map> output; + auto op = f::OpRegistry::CreateOp(op_type, {{"X", {"tmp"}}, {"Id", {"Id"}}}, + output, attrs); + + auto place = ctx.GetPlace(); + op->Run(*scope, place); + ctx.Wait(); + + auto* dst_var = scope->FindVar("tmp"); + auto* tensor_out = dst_var->GetMutable(); + + std::vector out_vec; + TensorToVector(*tensor_out, ctx, &out_vec); + + int expected = 0; + EXPECT_EQ(static_cast(out_vec[0]), expected); +} + +#ifdef PADDLE_WITH_CUDA +TEST(copy_cross_scope, CUDA_fp32) { + f::Scope scope; + p::CUDADeviceContext ctx(p::CUDAPlace(0)); + Compare1(&scope, ctx, "copy_cross_scope"); +} + +TEST(copy_cross_scope_to_main_scope, CUDA_fp32) { + f::Scope scope; + p::CUDADeviceContext ctx(p::CUDAPlace(0)); + Compare2(&scope, ctx, "copy_cross_scope"); +} +#elif PADDLE_WITH_ASCEND_CL +TEST(copy_cross_scope, NPU_fp32) { + f::Scope scope; + p::NPUDeviceContext ctx(p::NPUPlace(0)); + Compare1(&scope, ctx, "copy_cross_scope"); +} + +TEST(copy_cross_scope_to_main_scope, NPU_fp32) { + f::Scope scope; + p::NPUDeviceContext ctx(p::NPUPlace(0)); + Compare2(&scope, ctx, "copy_cross_scope"); +} +#endif diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 55e67327ec4..ccfec944a79 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -2254,7 +2254,8 @@ class Operator(object): 'gen_bkcl_id', 'c_gen_bkcl_id', 'gen_nccl_id', 'c_gen_nccl_id', 'c_comm_init', 'c_sync_calc_stream', 'c_sync_comm_stream', 'queue_generator', 'dequeue', 'enqueue', 'heter_listen_and_serv', - 'c_wait_comm', 'c_wait_compute', 'c_gen_hccl_id', 'c_comm_init_hccl' + 'c_wait_comm', 'c_wait_compute', 'c_gen_hccl_id', 'c_comm_init_hccl', + 'copy_cross_scope' } def __init__(self, -- GitLab