未验证 提交 55d31980 编写于 作者: Z zhangkaihuo 提交者: GitHub

[Sparse] add_coo_dense (#46322)

* for add_bias
上级 0f9dde43
...@@ -250,6 +250,9 @@ phi::SelectedRows* SetSelectedRowsKernelOutput(Tensor* out) { ...@@ -250,6 +250,9 @@ phi::SelectedRows* SetSelectedRowsKernelOutput(Tensor* out) {
} }
phi::TensorBase* SetSparseKernelOutput(Tensor* out, TensorType type) { phi::TensorBase* SetSparseKernelOutput(Tensor* out, TensorType type) {
if (!out) {
return nullptr;
}
if (!out->initialized()) { if (!out->initialized()) {
if (type == TensorType::SPARSE_COO) { if (type == TensorType::SPARSE_COO) {
auto sparse_tensor = std::make_shared<phi::SparseCooTensor>( auto sparse_tensor = std::make_shared<phi::SparseCooTensor>(
......
...@@ -36,11 +36,12 @@ ...@@ -36,11 +36,12 @@
args : (Tensor x, Tensor y, Tensor out_grad) args : (Tensor x, Tensor y, Tensor out_grad)
output : Tensor(x_grad), Tensor(y_grad) output : Tensor(x_grad), Tensor(y_grad)
infer_meta : infer_meta :
func : GeneralBinaryGradInferMeta func : GeneralBinaryGradInferMeta
param : [x, y] param : [x, y]
kernel : kernel :
func : add_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo}, func : add_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo},
add_csr_csr_grad{sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr} add_csr_csr_grad{sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr},
add_coo_dense_grad{sparse_coo, dense, sparse_coo -> sparse_coo, dense}
- backward_op : addmm_grad - backward_op : addmm_grad
forward : addmm(Tensor input, Tensor x, Tensor y, float alpha=1.0, float beta=1.0) -> Tensor(out) forward : addmm(Tensor input, Tensor x, Tensor y, float alpha=1.0, float beta=1.0) -> Tensor(out)
...@@ -104,7 +105,7 @@ ...@@ -104,7 +105,7 @@
args : (Tensor x, Tensor out_grad, DataType value_dtype) args : (Tensor x, Tensor out_grad, DataType value_dtype)
output : Tensor(x_grad) output : Tensor(x_grad)
infer_meta : infer_meta :
func : UnchangedInferMeta func : UnchangedInferMeta
param: [x] param: [x]
kernel : kernel :
func : cast_coo_grad {sparse_coo, sparse_coo -> sparse_coo}, func : cast_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
...@@ -126,7 +127,7 @@ ...@@ -126,7 +127,7 @@
args : (Tensor x, Tensor y, Tensor out, Tensor out_grad) args : (Tensor x, Tensor y, Tensor out, Tensor out_grad)
output : Tensor(x_grad), Tensor(y_grad) output : Tensor(x_grad), Tensor(y_grad)
infer_meta : infer_meta :
func : GeneralBinaryGradInferMeta func : GeneralBinaryGradInferMeta
param : [x, y] param : [x, y]
kernel : kernel :
func : divide_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo}, func : divide_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo},
...@@ -209,7 +210,7 @@ ...@@ -209,7 +210,7 @@
args : (Tensor x, Tensor y, Tensor out_grad) args : (Tensor x, Tensor y, Tensor out_grad)
output : Tensor(x_grad), Tensor(y_grad) output : Tensor(x_grad), Tensor(y_grad)
infer_meta : infer_meta :
func : GeneralBinaryGradInferMeta func : GeneralBinaryGradInferMeta
param : [x, y] param : [x, y]
kernel : kernel :
func : multiply_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo}, func : multiply_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo},
...@@ -337,7 +338,7 @@ ...@@ -337,7 +338,7 @@
args : (Tensor x, Tensor y, Tensor out_grad) args : (Tensor x, Tensor y, Tensor out_grad)
output : Tensor(x_grad), Tensor(y_grad) output : Tensor(x_grad), Tensor(y_grad)
infer_meta : infer_meta :
func : GeneralBinaryGradInferMeta func : GeneralBinaryGradInferMeta
param : [x, y] param : [x, y]
kernel : kernel :
func : subtract_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo}, func : subtract_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo},
...@@ -399,7 +400,7 @@ ...@@ -399,7 +400,7 @@
args: (Tensor query, Tensor key, Tensor value, Tensor softmax, Tensor out_grad) args: (Tensor query, Tensor key, Tensor value, Tensor softmax, Tensor out_grad)
output : Tensor(query_grad), Tensor(key_grad), Tensor(value_grad) output : Tensor(query_grad), Tensor(key_grad), Tensor(value_grad)
infer_meta : infer_meta :
func : sparse::FusedAttentionGradInferMeta func : sparse::FusedAttentionGradInferMeta
kernel : kernel :
func : fused_attention_csr_grad{dense, dense, dense, sparse_csr, dense -> dense, dense, dense} func : fused_attention_csr_grad{dense, dense, dense, sparse_csr, dense -> dense, dense, dense}
layout : softmax layout : softmax
......
...@@ -35,10 +35,11 @@ ...@@ -35,10 +35,11 @@
args : (Tensor x, Tensor y) args : (Tensor x, Tensor y)
output : Tensor(out) output : Tensor(out)
infer_meta : infer_meta :
func : ElementwiseInferMeta func : ElementwiseInferMeta
kernel : kernel :
func : add_coo_coo{sparse_coo, sparse_coo -> sparse_coo}, func : add_coo_coo{sparse_coo, sparse_coo -> sparse_coo},
add_csr_csr{sparse_csr, sparse_csr -> sparse_csr} add_csr_csr{sparse_csr, sparse_csr -> sparse_csr}
add_coo_dense{sparse_coo, dense -> sparse_coo},
layout : x layout : x
backward : add_grad backward : add_grad
...@@ -114,7 +115,7 @@ ...@@ -114,7 +115,7 @@
args : (Tensor x, Tensor y) args : (Tensor x, Tensor y)
output : Tensor(out) output : Tensor(out)
infer_meta : infer_meta :
func : ElementwiseInferMeta func : ElementwiseInferMeta
kernel : kernel :
func : divide_coo_coo{sparse_coo, sparse_coo -> sparse_coo}, func : divide_coo_coo{sparse_coo, sparse_coo -> sparse_coo},
divide_csr_csr{sparse_csr, sparse_csr -> sparse_csr} divide_csr_csr{sparse_csr, sparse_csr -> sparse_csr}
......
...@@ -415,3 +415,14 @@ PD_REGISTER_KERNEL(divide_coo_coo_grad, ...@@ -415,3 +415,14 @@ PD_REGISTER_KERNEL(divide_coo_coo_grad,
kernel->InputAt(2).SetDataLayout(phi::DataLayout::SPARSE_COO); kernel->InputAt(2).SetDataLayout(phi::DataLayout::SPARSE_COO);
kernel->InputAt(3).SetDataLayout(phi::DataLayout::SPARSE_COO); kernel->InputAt(3).SetDataLayout(phi::DataLayout::SPARSE_COO);
} }
PD_REGISTER_KERNEL(add_coo_dense_grad,
CPU,
ALL_LAYOUT,
phi::sparse::ElementWiseAddDenseGradKernel,
float,
double,
int,
int64_t) {
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
}
...@@ -156,6 +156,21 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx, ...@@ -156,6 +156,21 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx,
"shape = [%s], Y's shape = [%s].", "shape = [%s], Y's shape = [%s].",
x.dims(), x.dims(),
y.dims())); y.dims()));
// temporary policy: for broadcast add
// TODO(zhangkaihuo): implement a correct function
const bool is_add = std::is_same<Functor, funcs::AddFunctor<T>>::value;
if (is_add && x.indices().numel() == y.indices().numel()) {
int compare_indices = memcmp(x.indices().data<IntT>(),
y.indices().data<IntT>(),
sizeof(IntT) * x.indices().numel());
if (compare_indices == 0) {
EmptyLikeCooKernel<T, Context>(dev_ctx, x, out);
phi::AddKernel<T, Context>(
dev_ctx, x.values(), y.values(), out->mutable_values());
return;
}
}
int64_t element_size = 1; int64_t element_size = 1;
for (auto j = 1; j < x.values().dims().size(); ++j) { for (auto j = 1; j < x.values().dims().size(); ++j) {
element_size *= x.values().dims()[j]; element_size *= x.values().dims()[j];
...@@ -435,3 +450,14 @@ PD_REGISTER_KERNEL(divide_coo_coo, ...@@ -435,3 +450,14 @@ PD_REGISTER_KERNEL(divide_coo_coo,
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO); kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
kernel->InputAt(1).SetDataLayout(phi::DataLayout::SPARSE_COO); kernel->InputAt(1).SetDataLayout(phi::DataLayout::SPARSE_COO);
} }
PD_REGISTER_KERNEL(add_coo_dense,
CPU,
ALL_LAYOUT,
phi::sparse::ElementWiseAddDenseKernel,
float,
double,
int,
int64_t) {
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
}
...@@ -14,6 +14,9 @@ limitations under the License. */ ...@@ -14,6 +14,9 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/phi/kernels/elementwise_add_grad_kernel.h"
#include "paddle/phi/kernels/sparse/empty_kernel.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/core/sparse_coo_tensor.h"
#include "paddle/phi/core/sparse_csr_tensor.h" #include "paddle/phi/core/sparse_csr_tensor.h"
...@@ -119,5 +122,27 @@ std::vector<SparseCooTensor> ElementWiseDivideCooGrad( ...@@ -119,5 +122,27 @@ std::vector<SparseCooTensor> ElementWiseDivideCooGrad(
return std::vector<SparseCooTensor>{dx, dy}; return std::vector<SparseCooTensor>{dx, dy};
} }
template <typename T, typename Context>
void ElementWiseAddDenseGradKernel(const Context& dev_ctx,
const SparseCooTensor& x,
const DenseTensor& y,
const SparseCooTensor& dout,
SparseCooTensor* dx,
DenseTensor* dy) {
DenseTensor* x_values_grad = nullptr;
DenseTensor* y_grad = nullptr;
if (dx) {
EmptyLikeCooKernel<T, Context>(dev_ctx, x, dx);
x_values_grad = dx->mutable_values();
}
if (dy) {
*dy = phi::EmptyLike<T>(dev_ctx, y);
y_grad = dy;
}
phi::AddGradKernel<T, Context>(
dev_ctx, x.values(), y, dout.values(), -1, x_values_grad, y_grad);
}
} // namespace sparse } // namespace sparse
} // namespace phi } // namespace phi
...@@ -14,6 +14,10 @@ limitations under the License. */ ...@@ -14,6 +14,10 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/phi/kernels/elementwise_add_kernel.h"
#include "paddle/phi/kernels/sparse/elementwise_kernel.h"
#include "paddle/phi/kernels/sparse/empty_kernel.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/core/sparse_coo_tensor.h"
#include "paddle/phi/core/sparse_csr_tensor.h" #include "paddle/phi/core/sparse_csr_tensor.h"
...@@ -78,5 +82,21 @@ DEFINE_ELEMENTWISE_KERNEL_FUNC(Subtract) ...@@ -78,5 +82,21 @@ DEFINE_ELEMENTWISE_KERNEL_FUNC(Subtract)
DEFINE_ELEMENTWISE_KERNEL_FUNC(Multiply) DEFINE_ELEMENTWISE_KERNEL_FUNC(Multiply)
DEFINE_ELEMENTWISE_KERNEL_FUNC(Divide) DEFINE_ELEMENTWISE_KERNEL_FUNC(Divide)
template <typename T, typename Context>
void ElementWiseAddDenseKernel(const Context& dev_ctx,
const SparseCooTensor& x,
const DenseTensor& y,
SparseCooTensor* out) {
// TODO(zhangkaiuo): to support universal sparse + dense
if (y.dims().size() == 1 && y.dims()[0] == x.dims()[x.dims().size() - 1]) {
EmptyLikeCooKernel<T, Context>(dev_ctx, x, out);
phi::AddKernel<T, Context>(dev_ctx, x.values(), y, out->mutable_values());
out->SetIndicesDict(x.GetIndicesDict());
} else {
PADDLE_THROW(
errors::Unimplemented("Not support Sparse + Dense in GPU mode"));
}
}
} // namespace sparse } // namespace sparse
} // namespace phi } // namespace phi
...@@ -15,6 +15,9 @@ limitations under the License. */ ...@@ -15,6 +15,9 @@ limitations under the License. */
#include "paddle/phi/kernels/sparse/elementwise_grad_kernel.h" #include "paddle/phi/kernels/sparse/elementwise_grad_kernel.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/kernels/empty_kernel.h"
#include "paddle/phi/kernels/funcs/elementwise_grad_base.h"
#include "paddle/phi/kernels/funcs/reduce_function.h"
#include "paddle/phi/kernels/sparse/empty_kernel.h" #include "paddle/phi/kernels/sparse/empty_kernel.h"
namespace phi { namespace phi {
...@@ -54,3 +57,15 @@ PD_REGISTER_KERNEL(add_coo_coo_grad, ...@@ -54,3 +57,15 @@ PD_REGISTER_KERNEL(add_coo_coo_grad,
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO); kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
kernel->InputAt(1).SetDataLayout(phi::DataLayout::SPARSE_COO); kernel->InputAt(1).SetDataLayout(phi::DataLayout::SPARSE_COO);
} }
PD_REGISTER_KERNEL(add_coo_dense_grad,
GPU,
ALL_LAYOUT,
phi::sparse::ElementWiseAddDenseGradKernel,
float,
double,
int,
int64_t,
phi::dtype::float16) {
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
}
...@@ -31,6 +31,7 @@ void ElementWiseAddCooGPUKernel(const GPUContext& dev_ctx, ...@@ -31,6 +31,7 @@ void ElementWiseAddCooGPUKernel(const GPUContext& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
const SparseCooTensor& y, const SparseCooTensor& y,
SparseCooTensor* out) { SparseCooTensor* out) {
// TODO(zhangkaiuo): to support universal sparse + sparse
const auto& x_indices = x.indices(); const auto& x_indices = x.indices();
const auto& y_indices = y.indices(); const auto& y_indices = y.indices();
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
...@@ -57,6 +58,7 @@ void ElementWiseAddCooGPUKernel(const GPUContext& dev_ctx, ...@@ -57,6 +58,7 @@ void ElementWiseAddCooGPUKernel(const GPUContext& dev_ctx,
EmptyLikeCooKernel<T, GPUContext>(dev_ctx, x, out); EmptyLikeCooKernel<T, GPUContext>(dev_ctx, x, out);
phi::AddKernel<T, GPUContext>( phi::AddKernel<T, GPUContext>(
dev_ctx, x.values(), y.values(), out->mutable_values()); dev_ctx, x.values(), y.values(), out->mutable_values());
out->SetIndicesDict(x.GetIndicesDict());
} }
template <typename T, typename Context> template <typename T, typename Context>
...@@ -86,3 +88,15 @@ PD_REGISTER_KERNEL(add_coo_coo, ...@@ -86,3 +88,15 @@ PD_REGISTER_KERNEL(add_coo_coo,
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO); kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
kernel->InputAt(1).SetDataLayout(phi::DataLayout::SPARSE_COO); kernel->InputAt(1).SetDataLayout(phi::DataLayout::SPARSE_COO);
} }
PD_REGISTER_KERNEL(add_coo_dense,
GPU,
ALL_LAYOUT,
phi::sparse::ElementWiseAddDenseKernel,
float,
double,
int,
int64_t,
phi::dtype::float16) {
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
}
...@@ -163,6 +163,32 @@ class TestSparseElementWiseAPI(unittest.TestCase): ...@@ -163,6 +163,32 @@ class TestSparseElementWiseAPI(unittest.TestCase):
np.testing.assert_allclose(sp_b.grad.values().numpy(), np.testing.assert_allclose(sp_b.grad.values().numpy(),
values2.grad.numpy()) values2.grad.numpy())
def test_add_bias(self):
indices_data = [[0, 1], [0, 3]]
values_data = [[1.0, 1.0], [2.0, 2.0]]
shape = [2, 4, 2]
sp_a = sparse.sparse_coo_tensor(indices_data,
values_data,
shape,
stop_gradient=False)
bias_values = [1.0, 2.0]
values1 = paddle.to_tensor(values_data, stop_gradient=False)
values2 = paddle.to_tensor(bias_values, stop_gradient=False)
values3 = paddle.to_tensor(bias_values, stop_gradient=False)
#c.values() = a.values() + b
sp_c = sparse.add(sp_a, values2)
sp_c.backward()
ref_c = values1 + values3
ref_c.backward()
np.testing.assert_allclose(sp_c.values().numpy(), ref_c.numpy())
np.testing.assert_allclose(sp_a.grad.values().numpy(),
values1.grad.numpy())
np.testing.assert_allclose(values2.grad.numpy(), values3.grad.numpy())
if __name__ == "__main__": if __name__ == "__main__":
paddle.device.set_device('cpu') paddle.device.set_device('cpu')
......
...@@ -253,7 +253,7 @@ def add(x, y, name=None): ...@@ -253,7 +253,7 @@ def add(x, y, name=None):
""" """
if y.dtype != x.dtype: if y.dtype != x.dtype:
y = _C_ops.sparse_cast(y, None, x.dtype) y = cast(y, None, x.dtype)
return _C_ops.sparse_add(x, y) return _C_ops.sparse_add(x, y)
......
...@@ -18,6 +18,8 @@ from paddle import _C_ops, _legacy_C_ops, in_dynamic_mode ...@@ -18,6 +18,8 @@ from paddle import _C_ops, _legacy_C_ops, in_dynamic_mode
from paddle.fluid.layers.utils import convert_to_list from paddle.fluid.layers.utils import convert_to_list
from paddle.fluid.layers.nn import elementwise_add from paddle.fluid.layers.nn import elementwise_add
from ...creation import sparse_coo_tensor from ...creation import sparse_coo_tensor
from ...binary import add
from paddle.tensor import arange
from paddle.nn.functional.conv import _update_padding_nd from paddle.nn.functional.conv import _update_padding_nd
...@@ -67,12 +69,7 @@ def _conv3d(x, ...@@ -67,12 +69,7 @@ def _conv3d(x,
groups, subm, groups, subm,
key if key is not None else "") key if key is not None else "")
if bias is not None: if bias is not None:
values = pre_bias.values() return add(pre_bias, bias)
add_bias = elementwise_add(values, bias, axis=1)
return sparse_coo_tensor(pre_bias.indices(),
add_bias,
shape=pre_bias.shape,
stop_gradient=pre_bias.stop_gradient)
else: else:
return pre_bias return pre_bias
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册