未验证 提交 5de576b0 编写于 作者: Z zhiboniu 提交者: GitHub

add api fill_diagonal_inplace (#34460)

上级 16146088
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/fill_diagonal_op.h"
namespace paddle {
namespace operators {
int64_t CalStride(framework::DDim dim) {
int rank = dim.size();
int64_t dimsum = 1;
int64_t strides = 0;
for (int i = rank - 1; i >= 0; i--) {
strides += dimsum;
dimsum *= dim[i];
}
return strides;
}
class FillIDiagonalOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddComment(R"DOC(Fill replace operator
Fill the diagonal of an tensor with 'value'.
)DOC");
AddInput("X", "(Tensor) The input tensor.");
AddOutput("Out",
"Tensor, the output tensor, with the same shape and data type "
"as input(x)");
AddAttr<float>(
"value",
"The float values of tensor, whose dim is one, and no need of grad")
.SetDefault(0);
AddAttr<bool>("wrap",
"the diagonal 'wrapped' after N columns for tall matrices")
.SetDefault(false);
AddAttr<int>("offset",
"offset of diagonal, zero means no offset, positive means "
"offset to up-right corner; negtive means offset to "
"bottom-left corner")
.SetDefault(0);
}
};
class FillIDiagonalOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *context) const override {
OP_INOUT_CHECK(context->HasInput("X"), "Input", "X", "FillIDiagonal");
OP_INOUT_CHECK(context->HasOutput("Out"), "Output", "Out", "FillIDiagonal");
auto x_dims = context->GetInputDim("X");
context->SetOutputDim("Out", x_dims);
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override {
return framework::OpKernelType(
OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace());
}
};
class FillIDiagonalOpVarTypeInference : public framework::VarTypeInference {
public:
void operator()(framework::InferVarTypeContext *ctx) const override {
auto var_type = ctx->GetInputType("X", 0);
auto data_type = ctx->GetInputDataType("X", 0);
ctx->SetOutputType("Out", var_type, framework::ALL_ELEMENTS);
ctx->SetOutputDataType("Out", data_type, framework::ALL_ELEMENTS);
}
};
template <typename T>
class FillIDiagonalKernel : public framework::OpKernel<T> {
public:
void Compute(const paddle::framework::ExecutionContext &ctx) const override {
auto fill_val = ctx.template Attr<float>("value");
auto *out = ctx.Output<framework::Tensor>("Out");
auto offset = ctx.Attr<int>("offset");
auto wrap = ctx.Attr<bool>("wrap");
auto *xin = ctx.Input<framework::Tensor>("X");
T temp_var = static_cast<T>(fill_val);
T *out_data = out->mutable_data<T>(ctx.GetPlace());
framework::TensorCopy(*xin, ctx.GetPlace(), out);
auto out_dims = out->dims();
auto strides = CalStride(out_dims);
auto size = out->numel();
// The wrap mode supported only the dims equels to 2; In wrap mode, the
// value will be filled in cycles
if (!wrap) {
size = std::min(size, out_dims[1] * out_dims[1]);
}
for (int64_t i = offset; i < size; i += strides) {
out_data[i] = temp_var;
}
}
};
class FillIDiagonalGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override {
OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Out")), "Input",
"Out@GRAD", "mul");
auto x_dims = ctx->GetInputDim(framework::GradVarName("Out"));
auto x_grad_name = framework::GradVarName("X");
if (ctx->HasOutput(x_grad_name)) {
ctx->SetOutputDim(x_grad_name, x_dims);
}
}
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override {
// Note: don't get data type from ctx.Input<framework::Tensor>("Input");
auto dtype =
ctx.Input<framework::Tensor>(framework::GradVarName("Out"))->type();
return framework::OpKernelType(dtype, ctx.GetPlace());
}
};
template <typename T>
class FillIDiagonalGradOpMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
void Apply(GradOpPtr<T> retv) const override {
retv->SetType("fill_diagonal_grad");
retv->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
retv->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
retv->SetAttrMap(this->Attrs());
}
};
template <typename T>
class FillIDiagonalGradKernel : public framework::OpKernel<T> {
public:
void Compute(const paddle::framework::ExecutionContext &ctx) const override {
auto *dx = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
auto *dout = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto offset = ctx.Attr<int>("offset");
auto wrap = ctx.Attr<bool>("wrap");
if (dx) {
auto *data = dx->mutable_data<T>(ctx.GetPlace());
framework::TensorCopy(*dout, ctx.GetPlace(), dx);
auto dx_dims = dx->dims();
auto strides = CalStride(dx_dims);
auto size = dx->numel();
auto wrapsize = std::min(size, dx_dims[1] * dx_dims[1]);
// The wrap mode supported only the dims equels to 2; In wrap mode, the
// value will be filled in cycles
if (wrap) {
wrapsize = size;
}
for (int64_t i = offset; i < wrapsize; i += strides) {
data[i] = T(0);
}
}
}
};
DECLARE_INPLACE_OP_INFERER(FillIDiagonalOpInplaceInferer, {"X", "Out"});
DECLARE_INPLACE_OP_INFERER(FillIDiagonalGradOpInplaceInferer,
{framework::GradVarName("Out"),
framework::GradVarName("X")});
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(fill_diagonal, ops::FillIDiagonalOp,
ops::FillIDiagonalOpMaker,
ops::FillIDiagonalOpVarTypeInference,
ops::FillIDiagonalGradOpMaker<paddle::framework::OpDesc>,
ops::FillIDiagonalGradOpMaker<paddle::imperative::OpBase>,
ops::FillIDiagonalOpInplaceInferer);
REGISTER_OPERATOR(fill_diagonal_grad, ops::FillIDiagonalGradOp,
ops::FillIDiagonalGradOpInplaceInferer);
REGISTER_OP_CPU_KERNEL(fill_diagonal, ops::FillIDiagonalKernel<float>,
ops::FillIDiagonalKernel<double>,
ops::FillIDiagonalKernel<int64_t>,
ops::FillIDiagonalKernel<int>,
ops::FillIDiagonalKernel<paddle::platform::float16>,
ops::FillIDiagonalKernel<bool>);
REGISTER_OP_CPU_KERNEL(fill_diagonal_grad, ops::FillIDiagonalGradKernel<float>,
ops::FillIDiagonalGradKernel<double>,
ops::FillIDiagonalGradKernel<int64_t>,
ops::FillIDiagonalGradKernel<int>,
ops::FillIDiagonalGradKernel<paddle::platform::float16>,
ops::FillIDiagonalGradKernel<bool>);
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/fill_diagonal_op.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using CUDADeviceContext = paddle::platform::CUDADeviceContext;
template <typename T>
__global__ void fill_constant_kernel(const int64_t featuresize, T* in_data,
int64_t strides, int offset, T fillvar) {
for (int64_t idx = blockIdx.x * featuresize + threadIdx.x;
idx * strides + offset < (blockIdx.x + 1) * featuresize;
idx += blockDim.x) {
in_data[idx * strides + offset] = fillvar;
}
}
template <typename T>
class FillIDiagonalCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
#ifdef __HIPCC__
const int64_t kMaxBlockDim = 256;
#else
const int64_t kMaxBlockDim = 512;
#endif
auto* out = ctx.Output<Tensor>("Out");
auto offset = ctx.Attr<int>("offset");
auto wrap = ctx.Attr<bool>("wrap");
auto* xin = ctx.Input<framework::Tensor>("X");
framework::TensorCopy(*xin, ctx.GetPlace(), out);
T* out_data = out->mutable_data<T>(ctx.GetPlace());
auto fill_val = static_cast<T>(ctx.template Attr<float>("value"));
T temp_var = static_cast<T>(fill_val);
auto size = out->numel();
auto out_dims = out->dims();
auto strides = CalStride(out_dims);
// The wrap mode supported only the dims equels to 2; In wrap mode, the
// value will be filled in cycles
if (!wrap) {
size = std::min(size, out_dims[1] * out_dims[1]);
}
int64_t kBlockDim = std::min(int64_t(size / strides), kMaxBlockDim);
fill_constant_kernel<T><<<1, kBlockDim, 0>>>(size, out_data, strides,
offset, temp_var);
}
};
template <typename T>
class FillIDiagonalGradCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
#ifdef __HIPCC__
const int64_t kMaxBlockDim = 256;
#else
const int64_t kMaxBlockDim = 512;
#endif
auto* dx = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
auto* in_data = dx->mutable_data<T>(ctx.GetPlace());
auto* dout = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto offset = ctx.Attr<int>("offset");
auto wrap = ctx.Attr<bool>("wrap");
framework::TensorCopy(*dout, ctx.GetPlace(), dx);
auto size = dx->numel();
auto out_dims = dx->dims();
auto strides = CalStride(out_dims);
auto wrapsize = std::min(size, out_dims[1] * out_dims[1]);
// The wrap mode supported only the dims equels to 2; In wrap mode, the
// value will be filled in cycles
if (wrap) {
wrapsize = size;
}
int64_t kBlockDim = std::min(int64_t(size), kMaxBlockDim);
fill_constant_kernel<T><<<1, kBlockDim, 0>>>(wrapsize, in_data, strides,
offset, T(0));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_CUDA_KERNEL(fill_diagonal, ops::FillIDiagonalCUDAKernel<float>,
ops::FillIDiagonalCUDAKernel<double>,
ops::FillIDiagonalCUDAKernel<plat::float16>,
ops::FillIDiagonalCUDAKernel<int>,
ops::FillIDiagonalCUDAKernel<int64_t>,
ops::FillIDiagonalCUDAKernel<bool>);
REGISTER_OP_CUDA_KERNEL(fill_diagonal_grad,
ops::FillIDiagonalGradCUDAKernel<float>,
ops::FillIDiagonalGradCUDAKernel<double>,
ops::FillIDiagonalGradCUDAKernel<int>,
ops::FillIDiagonalGradCUDAKernel<int64_t>,
ops::FillIDiagonalGradCUDAKernel<plat::float16>,
ops::FillIDiagonalGradCUDAKernel<bool>);
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
int64_t CalStride(framework::DDim dim);
} // namespace operators
} // namespace paddle
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
import unittest
import numpy as np
import six
import paddle
class TensorFillDiagonal_Test(unittest.TestCase):
def test_dim2_normal(self):
expected_np = np.array(
[[1, 2, 2], [2, 1, 2], [2, 2, 1]]).astype('float32')
expected_grad = np.array(
[[0, 1, 1], [1, 0, 1], [1, 1, 0]]).astype('float32')
typelist = ['float32', 'float64', 'int32', 'int64']
places = [fluid.CPUPlace()]
if fluid.core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
for idx, p in enumerate(places):
if idx == 0:
paddle.set_device('cpu')
else:
paddle.set_device('gpu')
for dtype in typelist:
x = paddle.ones((3, 3), dtype=dtype)
x.stop_gradient = False
y = x * 2
y.fill_diagonal_(1, offset=0, wrap=True)
loss = y.sum()
loss.backward()
self.assertEqual(
(y.numpy().astype('float32') == expected_np).all(), True)
self.assertEqual(
(y.grad.numpy().astype('float32') == expected_grad).all(),
True)
def test_bool(self):
expected_np = np.array(
[[False, True, True], [True, False, True], [True, True, False]])
typelist = ['bool']
places = [fluid.CPUPlace()]
if fluid.core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
for idx, p in enumerate(places):
if idx == 0:
paddle.set_device('cpu')
else:
paddle.set_device('gpu')
for dtype in typelist:
x = paddle.ones((3, 3), dtype=dtype)
x.stop_gradient = True
x.fill_diagonal_(0, offset=0, wrap=True)
self.assertEqual((x.numpy() == expected_np).all(), True)
def test_dim2_unnormal_wrap(self):
expected_np = np.array([[1, 2, 2], [2, 1, 2], [2, 2, 1], [2, 2, 2],
[1, 2, 2], [2, 1, 2],
[2, 2, 1]]).astype('float32')
expected_grad = np.array([[0, 1, 1], [1, 0, 1], [1, 1, 0], [1, 1, 1],
[0, 1, 1], [1, 0, 1],
[1, 1, 0]]).astype('float32')
typelist = ['float32', 'float64', 'int32', 'int64']
places = [fluid.CPUPlace()]
if fluid.core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
for idx, p in enumerate(places):
if idx == 0:
paddle.set_device('cpu')
else:
paddle.set_device('gpu')
for dtype in typelist:
x = paddle.ones((7, 3), dtype=dtype)
x.stop_gradient = False
y = x * 2
y.fill_diagonal_(1, offset=0, wrap=True)
loss = y.sum()
loss.backward()
self.assertEqual(
(y.numpy().astype('float32') == expected_np).all(), True)
self.assertEqual(
(y.grad.numpy().astype('float32') == expected_grad).all(),
True)
def test_dim2_unnormal_unwrap(self):
expected_np = np.array([[1, 2, 2], [2, 1, 2], [2, 2, 1], [2, 2, 2],
[2, 2, 2], [2, 2, 2],
[2, 2, 2]]).astype('float32')
expected_grad = np.array([[0, 1, 1], [1, 0, 1], [1, 1, 0], [1, 1, 1],
[1, 1, 1], [1, 1, 1],
[1, 1, 1]]).astype('float32')
typelist = ['float32', 'float64', 'int32', 'int64']
places = [fluid.CPUPlace()]
if fluid.core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
for idx, p in enumerate(places):
if idx == 0:
paddle.set_device('cpu')
else:
paddle.set_device('gpu')
for dtype in typelist:
x = paddle.ones((7, 3), dtype=dtype)
x.stop_gradient = False
y = x * 2
y.fill_diagonal_(1, offset=0, wrap=False)
loss = y.sum()
loss.backward()
self.assertEqual(
(y.numpy().astype('float32') == expected_np).all(), True)
self.assertEqual(
(y.grad.numpy().astype('float32') == expected_grad).all(),
True)
def test_dim_larger2_normal(self):
expected_np = np.array([[[1, 2, 2], [2, 2, 2], [2, 2, 2]], [[2, 2, 2], [
2, 1, 2
], [2, 2, 2]], [[2, 2, 2], [2, 2, 2], [2, 2, 1]]]).astype('float32')
expected_grad = np.array(
[[[0, 1, 1], [1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 0, 1],
[1, 1, 1]],
[[1, 1, 1], [1, 1, 1], [1, 1, 0]]]).astype('float32')
typelist = ['float32', 'float64', 'int32', 'int64']
places = [fluid.CPUPlace()]
if fluid.core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
for idx, p in enumerate(places):
if idx == 0:
paddle.set_device('cpu')
else:
paddle.set_device('gpu')
for dtype in typelist:
x = paddle.ones((3, 3, 3), dtype=dtype)
x.stop_gradient = False
y = x * 2
y.fill_diagonal_(1, offset=0, wrap=True)
loss = y.sum()
loss.backward()
self.assertEqual(
(y.numpy().astype('float32') == expected_np).all(), True)
self.assertEqual(
(y.grad.numpy().astype('float32') == expected_grad).all(),
True)
if __name__ == '__main__':
unittest.main()
...@@ -37,6 +37,55 @@ from paddle import _C_ops ...@@ -37,6 +37,55 @@ from paddle import _C_ops
__all__ = [] __all__ = []
@dygraph_only
def fill_diagonal_(x, value, offset=0, wrap=False, name=None):
"""
**Notes**:
**This API is ONLY available in Dygraph mode**
This function fill the value into the x Tensor's diagonal inplace.
Args:
x(Tensor): ``x`` is the original Tensor
value(Scale): ``value`` is the value to filled in x
offset(int,optional): the offset to the main diagonal. Default: 0 (main diagonal).
wrap(bool,optional): the diagonal 'wrapped' after N columns for tall matrices.
name(str,optional): Name for the operation (optional, default is None)
Returns:
Tensor: Tensor with diagonal filled with value.
Returns type:
dtype is same as x Tensor
Examples:
.. code-block:: python
import paddle
x = paddle.ones((4, 3)) * 2
x.fill_diagonal_(1.0)
print(x.tolist()) #[[1.0, 2.0, 2.0], [2.0, 1.0, 2.0], [2.0, 2.0, 1.0], [2.0, 2.0, 2.0]]
"""
helper = LayerHelper("fill_diagonal_", **locals())
check_type(x, 'X', (Variable), 'fill_diagonal_')
dtype = helper.input_dtype('x')
check_dtype(dtype, 'X',
['bool', 'float16', 'float32', 'float64', 'int32', 'int64'],
'fill_diagonal_')
check_type(value, 'value', (bool, int, float), 'fill_diagonal_')
check_type(wrap, 'wrap', (bool), 'fill_diagonal_')
inshape = x.shape
inshapeset = set(inshape)
assert len(inshape) >= 2, ('Tensor dims should >= 2 in fill_diagonal_ API')
if len(inshape) > 2:
assert len(inshapeset) == 1, (
'Tensor dims should be equal while input dims > 2 in fill_diagonal_ API'
)
if len(inshape) == 2:
return core.ops.fill_diagonal_(x, 'value', value, 'offset', offset,
'wrap', wrap)
return core.ops.fill_diagonal_(x, 'value', value, 'offset', offset, 'wrap',
True)
setattr(core.VarBase, 'fill_diagonal_', fill_diagonal_)
@dygraph_only @dygraph_only
def tolist(x): def tolist(x):
""" """
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册