From a5021e894290aca676542094c8a0edcaa52476a5 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Wed, 14 Sep 2022 13:04:25 +0800 Subject: [PATCH] add check_memory_continue kernel (#45999) --- .../kernels/check_memory_continue_kernel.cc | 101 ++++++++++++++++++ .../kernels/check_memory_continue_kernel.h | 40 +++++++ 2 files changed, 141 insertions(+) create mode 100644 paddle/phi/kernels/check_memory_continue_kernel.cc create mode 100644 paddle/phi/kernels/check_memory_continue_kernel.h diff --git a/paddle/phi/kernels/check_memory_continue_kernel.cc b/paddle/phi/kernels/check_memory_continue_kernel.cc new file mode 100644 index 0000000000..2fafb5b9ef --- /dev/null +++ b/paddle/phi/kernels/check_memory_continue_kernel.cc @@ -0,0 +1,101 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/check_memory_continue_kernel.h" + +#include +#include +#include "glog/logging.h" + +#include "paddle/phi/core/kernel_registry.h" + +#include "paddle/fluid/platform/device_memory_aligment.h" + +namespace phi { + +template +void CheckMemoryContinueKernel(const Context &dev_ctx, + const std::vector &input, + DenseTensor *output, + std::vector xout) { + int64_t size_of_dtype = sizeof(T); + auto dtype = input.at(0)->dtype(); + int64_t numel = 0; + // check address + for (size_t i = 1; i < input.size(); ++i) { + PADDLE_ENFORCE_EQ( + dtype, + input.at(i)->dtype(), + errors::InvalidArgument( + "The DataType of input tensors of fake_coalesce should be " + "consistent, current dtype is: %s, but the previous dtype is %s", + dtype, + input.at(i)->dtype())); + const void *cur_address = input.at(i - 1)->data(); + int64_t len = input.at(i - 1)->numel(); + auto offset = + paddle::platform::Alignment(len * size_of_dtype, dev_ctx.GetPlace()); + void *infer_next_address = reinterpret_cast( + reinterpret_cast(cur_address) + offset); + const void *next_address = input.at(i)->data(); + numel += offset; + + VLOG(10) << ::paddle::string::Sprintf( + "Input[%d] address: 0X%02x, Input[%d] address: 0X%02x, Infer " + "input[%d] address: 0X%02x, offset: %d.", + i - 1, + cur_address, + i, + next_address, + i, + infer_next_address, + offset); + PADDLE_ENFORCE_EQ( + infer_next_address, + next_address, + errors::InvalidArgument( + "The infered address of the next tensor should be equal to the " + "real address of the next tensor. But got infered address is %p " + "and real address is %p.", + infer_next_address, + next_address)); + } + numel += paddle::platform::Alignment( + (*input.rbegin())->numel() * size_of_dtype, dev_ctx.GetPlace()); + // reset holder, do inplace + output->ShareBufferWith(*input.at(0)); + output->Resize({numel / size_of_dtype}); + VLOG(4) << "addr:" << output->data(); +} + +} // namespace phi + +PD_REGISTER_KERNEL(check_memory_continue, + CPU, + ALL_LAYOUT, + phi::CheckMemoryContinueKernel, + int, + float, + double) {} + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +PD_REGISTER_KERNEL(check_memory_continue, + GPU, + ALL_LAYOUT, + phi::CheckMemoryContinueKernel, + phi::dtype::float16, + int, + float, + double) {} +#endif diff --git a/paddle/phi/kernels/check_memory_continue_kernel.h b/paddle/phi/kernels/check_memory_continue_kernel.h new file mode 100644 index 0000000000..6b77b25992 --- /dev/null +++ b/paddle/phi/kernels/check_memory_continue_kernel.h @@ -0,0 +1,40 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include "paddle/phi/core/dense_tensor.h" + +namespace phi { + +// WHY add this op? +// This op is used for convert fused_all_reduce_op_handle in Graph to Program. +// i.e, fused_all_reduce_op_handle = check_memory_continue + c_allreduce_sum +// There are two reasons that check_memory_continue is added: +// 1. c_allreduce_sum takes single tensor as input, while +// fused_all_reduce_op_handle takse tensor array as input, so we need a op to +// convert tensor array into a single tensor +// 2. fused_all_reduce_op_handle has a premise that all tensor's addresses are +// continue, so we need a op to do the check. + +// see details in fused_all_reduce_op_handle.cc +template +void CheckMemoryContinueKernel(const Context &dev_ctx, + const std::vector &input, + DenseTensor *output, + std::vector xout); + +} // namespace phi -- GitLab