未验证 提交 c2e77ba3 编写于 作者: L Leo Chen 提交者: GitHub

move share_buffer kernel to phi (#48858)

* move share_buffer kernel to phi

* fix ut

* add source file

* fix window links
上级 8f1e24d5
......@@ -81,6 +81,7 @@ yaml_types_mapping = {
'str': 'std::string',
'str[]': 'std::vector<std::string>',
'float[]': 'std::vector<float>',
'bool[]': 'std::vector<bool>',
'Place': 'paddle::Place',
'DataLayout': 'phi::DataLayout',
'DataType': 'paddle::experimental::DataType',
......
......@@ -3258,6 +3258,10 @@ void OperatorWithKernel::BuildPhiKernelContext(
phi_kernel_context->EmplaceBackAttr(
PADDLE_GET_CONST(std::vector<int>, attr_iter->second));
break;
case phi::AttributeType::BOOLS:
phi_kernel_context->EmplaceBackAttr(
PADDLE_GET_CONST(std::vector<bool>, attr_iter->second));
break;
case phi::AttributeType::DATA_TYPE: {
auto data_type = framework::TransToPhiDataType(
static_cast<framework::proto::VarType::Type>(
......
......@@ -207,7 +207,7 @@ elseif(WITH_ROCM)
else()
cc_test(test_leaky_relu_grad_grad_functor SRCS test_leaky_relu_grad_grad_functor.cc DEPS tensor device_context eigen3)
endif()
cc_test(share_buffer_op_cpp_test SRCS share_buffer_op_test.cc DEPS lod_tensor device_context share_buffer_op)
cc_test(share_buffer_op_cpp_test SRCS share_buffer_op_test.cc DEPS lod_tensor device_context generated_op)
cc_library(tensor_formatter SRCS tensor_formatter.cc DEPS ${OP_HEADER_DEPS})
if (WITH_PYTHON)
......
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/share_buffer_op.h"
namespace paddle {
namespace operators {
class ShareBufferOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
// dtype is not important
return framework::OpKernelType(framework::proto::VarType::FP32,
ctx.GetPlace());
}
framework::OpKernelType GetKernelTypeForVar(
const std::string& var_name,
const phi::DenseTensor& tensor,
const framework::OpKernelType& expected_kernel_type) const override {
return expected_kernel_type;
}
};
class ShareBufferOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X", "(Tensor), The input tensors of share buffer op")
.AsDuplicable();
AddOutput("Out", "(Tensor), The output tensors of share buffer op")
.AsDuplicable();
AddOutput("XOut",
"(Tensor), The output tensors which are the same as X. It is "
"used to build the graph dependency")
.AsDuplicable();
AddAttr<std::vector<bool>>("share_dims_and_dtype",
"Whether to share dims and data type")
.SetDefault(std::vector<bool>());
AddComment(
R"DOC(Operator used to perform inplace memory reuse. It should be not exposed to Python APIs.)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(share_buffer, ops::ShareBufferOp, ops::ShareBufferOpMaker);
// dtype is not important
REGISTER_OP_CPU_KERNEL(share_buffer, ops::ShareBufferOpKernel<float>);
#ifdef PADDLE_WITH_ASCEND_CL
REGISTER_OP_NPU_KERNEL(share_buffer, ops::ShareBufferOpKernel<float>);
#endif
#ifdef PADDLE_WITH_XPU
REGISTER_OP_XPU_KERNEL(share_buffer, ops::ShareBufferOpKernel<float>);
#endif
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
namespace paddle {
namespace operators {
template <typename T>
class ShareBufferOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
const auto inputs = ctx.MultiInput<phi::DenseTensor>("X");
auto outputs = ctx.MultiOutput<phi::DenseTensor>("Out");
size_t n = inputs.size();
PADDLE_ENFORCE_EQ(
n,
outputs.size(),
platform::errors::PermissionDenied("Variable number not match."));
const auto &share_dims_and_dtype =
ctx.Attr<std::vector<bool>>("share_dims_and_dtype");
if (!share_dims_and_dtype.empty()) {
PADDLE_ENFORCE_EQ(n,
share_dims_and_dtype.size(),
platform::errors::PermissionDenied(
"Attribute share_dims_and_dtype number not match "
"input variable number."));
}
const std::vector<std::string> *input_args = nullptr,
*output_args = nullptr;
if (VLOG_IS_ON(10)) {
input_args = &ctx.GetOp().Inputs("X");
output_args = &ctx.GetOp().Outputs("Out");
}
for (size_t i = 0; i < n; ++i) {
if (inputs[i] == nullptr || outputs[i] == nullptr) {
continue;
}
outputs[i]->ShareBufferWith(*inputs[i]);
VLOG(10) << "Share tensor buffer " << (*input_args)[i] << " -> "
<< (*output_args)[i];
if (!share_dims_and_dtype.empty() && share_dims_and_dtype[i]) {
outputs[i]->Resize(inputs[i]->dims());
outputs[i]->ShareDataTypeWith(*inputs[i]);
}
}
}
};
} // namespace operators
} // namespace paddle
......@@ -18,8 +18,15 @@
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/core/kernel_registry.h"
USE_OP(share_buffer);
USE_OP_ITSELF(share_buffer);
PD_DECLARE_KERNEL(share_buffer, CPU, ALL_LAYOUT);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_DECLARE_KERNEL(share_buffer, GPU, ALL_LAYOUT);
#endif
namespace paddle {
namespace framework {
......
......@@ -166,6 +166,7 @@ class BaseAPI:
'float[]': 'const std::vector<float>&',
'double': 'double',
'bool': 'bool',
'bool[]': 'const std::vector<bool>&',
'str': 'const std::string&',
'str[]': 'const std::vector<std::string>&',
'Place': 'const Place&',
......
......@@ -1019,6 +1019,13 @@
extra :
attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"]
- op : share_buffer
inputs :
x : X
outputs :
out : Out
xout : XOut
- op : shuffle_channel
backward : shuffle_channel_grad
extra :
......
......@@ -952,3 +952,11 @@
kernel :
func : unfold
backward : unfold_grad
- op: share_buffer
args : (Tensor[] x, bool[] share_dims_and_dtype={})
output : Tensor[](out){x.size()}, Tensor[](xout){x.size()}
infer_meta :
func : ShareBufferInferMeta
kernel :
func : share_buffer
......@@ -2668,6 +2668,30 @@ void UnchangedMultiInferMeta(const std::vector<const MetaTensor*>& x,
}
}
void ShareBufferInferMeta(const std::vector<const MetaTensor*>& xs,
const std::vector<bool>& share_dims_and_dtype,
std::vector<MetaTensor*> outs,
std::vector<MetaTensor*> xouts) {
if (share_dims_and_dtype.empty()) {
return;
}
PADDLE_ENFORCE_EQ(xs.size(),
share_dims_and_dtype.size(),
phi::errors::PermissionDenied(
"The input(X) and attribute share_dims_and_dtype "
"should have the same size, but got size of input(X) "
"is %d and size of share_dims_and_dtype is %d.",
xs.size(),
share_dims_and_dtype.size()));
for (size_t i = 0; i < xs.size(); ++i) {
if (share_dims_and_dtype[i]) {
outs[i]->set_dims(xs[i]->dims());
outs[i]->set_dtype(xs[i]->dtype());
}
}
}
void UpdateLossScalingInferMeta(const std::vector<const MetaTensor*>& xs,
const MetaTensor& found_infinite,
const MetaTensor& prev_loss_scaling,
......
......@@ -479,6 +479,11 @@ void StackInferMeta(const std::vector<const MetaTensor*>& x,
void UnchangedMultiInferMeta(const std::vector<const MetaTensor*>& x,
std::vector<MetaTensor*> out);
void ShareBufferInferMeta(const std::vector<const MetaTensor*>& x,
const std::vector<bool>& share_dims_and_dtype,
std::vector<MetaTensor*> out,
std::vector<MetaTensor*> xout);
void UpdateLossScalingInferMeta(const std::vector<const MetaTensor*>& xs,
const MetaTensor& found_infinite,
const MetaTensor& prev_loss_scaling,
......
......@@ -14,6 +14,8 @@
#pragma once
#include <vector>
#include "paddle/phi/core/dense_tensor.h"
namespace phi {
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/share_buffer_kernel.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/visit_type.h"
namespace phi {
template <typename Context>
void ShareBufferKernel(const Context &dev_ctx,
const std::vector<const DenseTensor *> &x,
const std::vector<bool> &share_dims_and_dtype,
std::vector<DenseTensor *> out,
std::vector<DenseTensor *> xout) {
PADDLE_ENFORCE_EQ(
x.size(),
out.size(),
phi::errors::PermissionDenied(
"The input(X) and Output(out) should have the same size, but got "
"size of Input(X) is %d and size of Output(out) is %d.",
x.size(),
out.size()));
for (size_t i = 0; i < x.size(); ++i) {
if (x[i] == nullptr || out[i] == nullptr) {
continue;
}
out[i]->ShareBufferWith(*x[i]);
VLOG(10) << "Share tensor buffer ";
}
}
} // namespace phi
PD_REGISTER_GENERAL_KERNEL(share_buffer,
CPU,
ALL_LAYOUT,
phi::ShareBufferKernel<phi::CPUContext>,
ALL_DTYPE) {}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_REGISTER_GENERAL_KERNEL(share_buffer,
GPU,
ALL_LAYOUT,
phi::ShareBufferKernel<phi::GPUContext>,
ALL_DTYPE) {}
#endif
#ifdef PADDLE_WITH_XPU
PD_REGISTER_GENERAL_KERNEL(share_buffer,
XPU,
ALL_LAYOUT,
phi::ShareBufferKernel<phi::XPUContext>,
ALL_DTYPE) {}
#endif
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,7 +12,19 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/share_buffer_op.h"
#pragma once
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(share_buffer, ops::ShareBufferOpKernel<float>);
#include <vector>
#include "paddle/phi/core/dense_tensor.h"
namespace phi {
template <typename Context>
void ShareBufferKernel(const Context &dev_ctx,
const std::vector<const DenseTensor *> &x,
const std::vector<bool> &share_dims_and_dtype,
std::vector<DenseTensor *> out,
std::vector<DenseTensor *> xout);
} // namespace phi
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册