Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
44d8c6ed
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
44d8c6ed
编写于
4月 19, 2022
作者:
Z
zhangkaihuo
提交者:
GitHub
4月 19, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add kernel sparse_mask_helper; sparse_coo_tensor_grad (#41586) (#41902)
cherry-pick the PR#41586 to realese/2.3
上级
dab7dfbf
变更
13
隐藏空白更改
内联
并排
Showing
13 changed file
with
476 addition
and
21 deletion
+476
-21
paddle/phi/kernels/funcs/sparse/common_shape.h
paddle/phi/kernels/funcs/sparse/common_shape.h
+39
-0
paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc
paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc
+90
-11
paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc
paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc
+12
-0
paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu
paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu
+165
-1
paddle/phi/kernels/sparse/gpu/sparse_utils_kernel.cu
paddle/phi/kernels/sparse/gpu/sparse_utils_kernel.cu
+12
-0
paddle/phi/kernels/sparse/sparse_mask_kernel.h
paddle/phi/kernels/sparse/sparse_mask_kernel.h
+6
-0
paddle/phi/kernels/sparse/sparse_utils_grad_kernel.cc
paddle/phi/kernels/sparse/sparse_utils_grad_kernel.cc
+25
-0
paddle/phi/kernels/sparse/sparse_utils_grad_kernel.h
paddle/phi/kernels/sparse/sparse_utils_grad_kernel.h
+9
-0
paddle/phi/kernels/sparse/sparse_utils_kernel.h
paddle/phi/kernels/sparse/sparse_utils_kernel.h
+12
-0
python/paddle/fluid/tests/unittests/test_sparse_utils_op.py
python/paddle/fluid/tests/unittests/test_sparse_utils_op.py
+58
-6
python/paddle/sparse/creation.py
python/paddle/sparse/creation.py
+33
-3
python/paddle/utils/code_gen/sparse_api.yaml
python/paddle/utils/code_gen/sparse_api.yaml
+8
-0
python/paddle/utils/code_gen/sparse_bw_api.yaml
python/paddle/utils/code_gen/sparse_bw_api.yaml
+7
-0
未找到文件。
paddle/phi/kernels/funcs/sparse/common_shape.h
浏览文件 @
44d8c6ed
...
...
@@ -40,6 +40,45 @@ inline const DDim InferDenseDims(const DDim& x_dims,
return
values_dims
;
}
template
<
typename
IntT
>
inline
const
IntT
HOSTDEVICE
IndicesToIndex
(
const
IntT
*
indices
,
const
IntT
*
sparse_offsets
,
const
int64_t
non_zero_num
,
const
int64_t
sparse_dim
,
const
int
i
)
{
IntT
index
=
0
;
for
(
IntT
j
=
0
;
j
<
sparse_dim
;
j
++
)
{
index
+=
indices
[
j
*
non_zero_num
+
i
]
*
sparse_offsets
[
j
];
}
return
index
;
}
template
<
typename
IntT
>
inline
void
HOSTDEVICE
FlattenIndices
(
const
IntT
*
indices
,
const
IntT
*
sparse_offsets
,
const
int64_t
non_zero_num
,
const
int64_t
sparse_dim
,
const
int
start
,
const
int
stride
,
IntT
*
out
)
{
for
(
int
i
=
start
;
i
<
non_zero_num
;
i
+=
stride
)
{
out
[
i
]
=
IndicesToIndex
(
indices
,
sparse_offsets
,
non_zero_num
,
sparse_dim
,
i
);
}
}
// 1. indices.dims().size() == 2
template
<
typename
IntT
>
inline
void
CalcOffsetsPerDim
(
const
DDim
&
dims
,
const
int64_t
sparse_dim
,
std
::
vector
<
IntT
>*
offsets
)
{
IntT
offset
=
1
;
for
(
IntT
i
=
sparse_dim
-
1
;
i
>=
0
;
i
--
)
{
(
*
offsets
)[
i
]
=
offset
;
offset
*=
dims
[
i
];
}
}
}
// namespace sparse
}
// namespace funcs
}
// namespace phi
paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc
浏览文件 @
44d8c6ed
...
...
@@ -19,6 +19,7 @@ limitations under the License. */
#include "paddle/phi/kernels/copy_kernel.h"
#include "paddle/phi/kernels/empty_kernel.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/sparse/common_shape.h"
#include "paddle/phi/api/ext/dispatch.h"
...
...
@@ -38,12 +39,6 @@ void SparseMaskCPUKernel(const CPUContext& dev_ctx,
const
DenseTensor
&
indices
=
mask
.
non_zero_indices
();
const
DenseTensor
&
values
=
mask
.
non_zero_elements
();
int
sparse_dim
=
indices
.
dims
().
size
();
std
::
vector
<
int64_t
>
sparse_offsets
(
sparse_dim
);
int64_t
offset
=
1
;
for
(
int
i
=
sparse_dim
-
1
;
i
>=
0
;
i
--
)
{
sparse_offsets
[
i
]
=
offset
;
offset
*=
dims
[
i
];
}
DenseTensor
out_indices
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
indices
);
DenseTensor
out_values
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
values
);
...
...
@@ -51,21 +46,25 @@ void SparseMaskCPUKernel(const CPUContext& dev_ctx,
// the out_indices is same as indices of mask
phi
::
Copy
(
dev_ctx
,
indices
,
dev_ctx
.
GetPlace
(),
false
,
&
out_indices
);
const
IntT
*
indices_ptr
=
indices
.
data
<
IntT
>
();
T
*
out_values_ptr
=
out_values
.
data
<
T
>
();
const
T
*
x_ptr
=
x
.
data
<
T
>
();
const
int64_t
non_zero_num
=
mask
.
nnz
();
auto
dims_2d
=
flatten_to_2d
(
dims
,
sparse_dim
);
const
int
cols
=
dims_2d
[
1
];
const
IntT
*
indices_ptr
=
indices
.
data
<
IntT
>
();
std
::
vector
<
IntT
>
out_indexs
(
non_zero_num
),
sparse_offsets
(
sparse_dim
);
phi
::
funcs
::
sparse
::
CalcOffsetsPerDim
<
IntT
>
(
dims
,
sparse_dim
,
&
sparse_offsets
);
for
(
int64_t
i
=
0
;
i
<
non_zero_num
;
i
++
)
{
int64_t
index
=
0
;
for
(
int
j
=
0
;
j
<
sparse_dim
;
j
++
)
{
index
+=
indices_ptr
[
j
*
non_zero_num
+
i
]
*
sparse_offsets
[
j
];
}
int64_t
index
=
phi
::
funcs
::
sparse
::
IndicesToIndex
<
IntT
>
(
indices_ptr
,
sparse_offsets
.
data
(),
non_zero_num
,
sparse_dim
,
i
);
memcpy
(
out_values_ptr
+
i
*
cols
,
x_ptr
+
index
*
cols
,
cols
*
sizeof
(
T
));
}
out
->
SetMember
(
out_indices
,
out_values
,
dims
,
true
);
}
...
...
@@ -85,6 +84,73 @@ void SparseMaskKernel(const Context& dev_ctx,
}));
}
template
<
typename
T
,
typename
IntT
>
void
SparseMaskHelperCPUKernel
(
const
CPUContext
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
mask_indices
,
DenseTensor
*
out
)
{
PADDLE_ENFORCE_EQ
(
mask_indices
.
dims
().
size
(),
2
,
phi
::
errors
::
InvalidArgument
(
"the mask_indices must be 2-D tensor"
));
const
int64_t
sparse_dim
=
x
.
non_zero_indices
().
dims
()[
0
];
std
::
vector
<
IntT
>
sparse_offsets
(
sparse_dim
),
x_indexs
(
x
.
nnz
()),
mask_indexs
(
mask_indices
.
dims
()[
1
]);
phi
::
funcs
::
sparse
::
CalcOffsetsPerDim
<
IntT
>
(
x
.
dims
(),
sparse_dim
,
&
sparse_offsets
);
phi
::
funcs
::
sparse
::
FlattenIndices
(
x
.
non_zero_indices
().
data
<
IntT
>
(),
sparse_offsets
.
data
(),
x
.
nnz
(),
sparse_dim
,
0
,
1
,
x_indexs
.
data
());
phi
::
funcs
::
sparse
::
FlattenIndices
(
mask_indices
.
data
<
IntT
>
(),
sparse_offsets
.
data
(),
x
.
nnz
(),
sparse_dim
,
0
,
1
,
mask_indexs
.
data
());
std
::
unordered_map
<
IntT
,
uint64_t
>
x_indexs_map
;
for
(
uint64_t
i
=
0
;
i
<
x_indexs
.
size
();
i
++
)
{
x_indexs_map
[
x_indexs
[
i
]]
=
i
;
}
*
out
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
x
.
non_zero_elements
());
T
*
out_ptr
=
out
->
data
<
T
>
();
memset
(
out_ptr
,
static_cast
<
T
>
(
0
),
out
->
numel
()
*
sizeof
(
T
));
const
int64_t
stride
=
x
.
dims
().
size
()
==
sparse_dim
?
1
:
x
.
dims
().
size
()
-
sparse_dim
;
const
T
*
in_ptr
=
x
.
non_zero_elements
().
data
<
T
>
();
// TODO(zhangkaihuo): multithreading can be used for acceleration
for
(
uint64_t
i
=
0
;
i
<
mask_indexs
.
size
();
i
++
)
{
auto
iter
=
x_indexs_map
.
find
(
mask_indexs
[
i
]);
if
(
iter
!=
x_indexs_map
.
end
())
{
memcpy
(
out_ptr
+
i
*
stride
,
in_ptr
+
iter
->
second
*
stride
,
stride
*
sizeof
(
T
));
}
}
}
/**
* @brief filter values from x.values() using mask_indices
*/
template
<
typename
T
,
typename
Context
>
void
SparseMaskHelperKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
mask_indices
,
DenseTensor
*
out
)
{
PD_DISPATCH_INTEGRAL_TYPES
(
x
.
non_zero_indices
().
dtype
(),
"SparseMaskHelperCPUKernel"
,
([
&
]
{
SparseMaskHelperCPUKernel
<
T
,
data_t
>
(
dev_ctx
,
x
,
mask_indices
,
out
);
}));
}
}
// namespace sparse
}
// namespace phi
...
...
@@ -101,3 +167,16 @@ PD_REGISTER_KERNEL(sparse_mask,
int64_t
)
{
kernel
->
InputAt
(
1
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_COO
);
}
PD_REGISTER_KERNEL
(
sparse_mask_helper
,
CPU
,
ALL_LAYOUT
,
phi
::
sparse
::
SparseMaskHelperKernel
,
float
,
double
,
uint8_t
,
int16_t
,
int
,
int64_t
)
{
kernel
->
InputAt
(
0
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_COO
);
}
paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc
浏览文件 @
44d8c6ed
...
...
@@ -394,3 +394,15 @@ PD_REGISTER_KERNEL(csr_values,
int64_t
)
{
kernel
->
InputAt
(
0
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_COO
);
}
PD_REGISTER_KERNEL
(
sparse_coo_tensor
,
CPU
,
ALL_LAYOUT
,
phi
::
sparse
::
SparseCooTensorKernel
,
float
,
double
,
phi
::
dtype
::
float16
,
uint8_t
,
int16_t
,
int
,
int64_t
)
{}
paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu
浏览文件 @
44d8c6ed
...
...
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <thrust/binary_search.h>
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/core/ddim.h"
...
...
@@ -20,6 +22,7 @@ limitations under the License. */
#include "paddle/phi/kernels/copy_kernel.h"
#include "paddle/phi/kernels/empty_kernel.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/sparse/common_shape.h"
#include "paddle/phi/kernels/sparse/sparse_mask_kernel.h"
#include "paddle/phi/api/ext/dispatch.h"
...
...
@@ -59,7 +62,7 @@ void SparseMaskGPUKernel(const GPUContext& dev_ctx,
const
DenseTensor
&
indices
=
mask
.
non_zero_indices
();
const
DenseTensor
&
values
=
mask
.
non_zero_elements
();
int
sparse_dim
=
indices
.
dims
().
size
();
DenseTensor
sparse_offsets
=
phi
::
Empty
(
DenseTensor
sparse_offsets
=
phi
::
Empty
<
GPUContext
>
(
dev_ctx
,
DenseTensorMeta
(
DataType
::
INT64
,
{
sparse_dim
},
DataLayout
::
NCHW
));
std
::
vector
<
int64_t
>
h_sparse_offsets
(
sparse_dim
);
...
...
@@ -121,6 +124,153 @@ void SparseMaskKernel(const Context& dev_ctx,
}));
}
// TODO(zhangkaihuo): Use an op to realize the function of FlattenIndices
template
<
typename
IntT
>
__global__
void
FlattenIndicesKernel
(
const
IntT
*
indices
,
const
IntT
*
sparse_offsets
,
const
int64_t
non_zero_num
,
const
int64_t
sparse_dim
,
IntT
*
out
)
{
int
tid
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
phi
::
funcs
::
sparse
::
FlattenIndices
<
IntT
>
(
indices
,
sparse_offsets
,
non_zero_num
,
sparse_dim
,
tid
,
gridDim
.
x
*
blockDim
.
x
,
out
);
}
template
<
typename
T
,
typename
IntT
>
__global__
void
SparseMaskCopyKernel
(
const
IntT
*
x_indexs
,
const
IntT
*
mask_indexs
,
const
IntT
*
bound_out
,
const
T
*
x_values
,
const
int64_t
n
,
const
int64_t
stride
,
T
*
out_values
)
{
CUDA_KERNEL_LOOP_TYPE
(
i
,
n
,
int64_t
)
{
const
IntT
j
=
bound_out
[
i
];
if
(
j
>=
0
&&
j
<
n
&&
mask_indexs
[
i
]
==
x_indexs
[
j
])
{
for
(
int
k
=
0
;
k
<
stride
;
k
++
)
{
out_values
[
i
*
stride
+
k
]
=
x_values
[
j
*
stride
+
k
];
}
}
}
}
template
<
typename
T
,
typename
IntT
>
void
SparseMaskHelperGPUKernel
(
const
GPUContext
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
mask_indices
,
DenseTensor
*
out
)
{
PADDLE_ENFORCE_EQ
(
mask_indices
.
dims
().
size
(),
2
,
phi
::
errors
::
InvalidArgument
(
"the mask_indices must be 2-D tensor"
));
const
int64_t
sparse_dim
=
x
.
non_zero_indices
().
dims
()[
0
];
auto
indices_dtype
=
paddle
::
experimental
::
CppTypeToDataType
<
IntT
>::
Type
();
std
::
vector
<
IntT
>
sparse_offsets
(
sparse_dim
);
DenseTensorMeta
x_indexs_meta
(
indices_dtype
,
{
x
.
nnz
()},
DataLayout
::
NCHW
);
DenseTensorMeta
mask_indexs_meta
(
indices_dtype
,
{
mask_indices
.
dims
()[
1
]},
DataLayout
::
NCHW
);
DenseTensorMeta
sparse_offset_meta
(
indices_dtype
,
{
sparse_dim
},
DataLayout
::
NCHW
);
DenseTensor
x_indexs
=
phi
::
Empty
<
GPUContext
>
(
dev_ctx
,
std
::
move
(
x_indexs_meta
));
DenseTensor
mask_indexs
=
phi
::
Empty
<
GPUContext
>
(
dev_ctx
,
std
::
move
(
mask_indexs_meta
));
DenseTensor
bound_out
=
phi
::
Empty
<
GPUContext
>
(
dev_ctx
,
std
::
move
(
mask_indexs_meta
));
DenseTensor
d_sparse_offsets
=
phi
::
Empty
<
GPUContext
>
(
dev_ctx
,
std
::
move
(
sparse_offset_meta
));
IntT
*
x_indexs_ptr
=
x_indexs
.
data
<
IntT
>
();
IntT
*
mask_indexs_ptr
=
mask_indexs
.
data
<
IntT
>
();
IntT
*
bound_out_ptr
=
bound_out
.
data
<
IntT
>
();
// 1. calc the offsets of per dim
phi
::
funcs
::
sparse
::
CalcOffsetsPerDim
(
x
.
dims
(),
sparse_dim
,
&
sparse_offsets
);
// 2. copy sparse_offsets to device
phi
::
backends
::
gpu
::
GpuMemcpyAsync
(
d_sparse_offsets
.
data
<
IntT
>
(),
sparse_offsets
.
data
(),
sizeof
(
IntT
)
*
sparse_dim
,
#ifdef PADDLE_WITH_HIP
hipMemcpyHostToDevice
,
#else
cudaMemcpyHostToDevice
,
#endif
dev_ctx
.
stream
());
// 3. flatten x indices and mask indices
auto
config
=
phi
::
backends
::
gpu
::
GetGpuLaunchConfig1D
(
dev_ctx
,
x_indexs
.
numel
(),
1
);
FlattenIndicesKernel
<<<
config
.
block_per_grid
,
config
.
thread_per_block
,
0
,
dev_ctx
.
stream
()
>>>
(
x
.
non_zero_indices
().
data
<
IntT
>
(),
d_sparse_offsets
.
data
<
IntT
>
(),
x_indexs
.
numel
(),
sparse_dim
,
x_indexs_ptr
);
config
=
phi
::
backends
::
gpu
::
GetGpuLaunchConfig1D
(
dev_ctx
,
mask_indexs
.
numel
(),
1
);
FlattenIndicesKernel
<<<
config
.
block_per_grid
,
config
.
thread_per_block
,
0
,
dev_ctx
.
stream
()
>>>
(
mask_indices
.
data
<
IntT
>
(),
d_sparse_offsets
.
data
<
IntT
>
(),
mask_indexs
.
numel
(),
sparse_dim
,
mask_indexs_ptr
);
// 4. call thrust::lower_bound
#ifdef PADDLE_WITH_HIP
thrust
::
lower_bound
(
thrust
::
hip
::
par
.
on
(
dev_ctx
.
stream
()),
#else
thrust
::
lower_bound
(
thrust
::
cuda
::
par
.
on
(
dev_ctx
.
stream
()),
#endif
x_indexs_ptr
,
x_indexs_ptr
+
x_indexs
.
numel
(),
mask_indexs_ptr
,
mask_indexs_ptr
+
mask_indexs
.
numel
(),
bound_out_ptr
);
// 5. copy value to out
*
out
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
x
.
non_zero_elements
());
phi
::
funcs
::
SetConstant
<
GPUContext
,
T
>
set_zero
;
set_zero
(
dev_ctx
,
out
,
static_cast
<
T
>
(
0
));
T
*
out_ptr
=
out
->
data
<
T
>
();
const
int64_t
stride
=
x
.
dims
().
size
()
==
sparse_dim
?
1
:
x
.
dims
().
size
()
-
sparse_dim
;
SparseMaskCopyKernel
<<<
config
.
block_per_grid
,
config
.
thread_per_block
,
0
,
dev_ctx
.
stream
()
>>>
(
x_indexs_ptr
,
mask_indexs_ptr
,
bound_out_ptr
,
x
.
non_zero_elements
().
data
<
T
>
(),
mask_indexs
.
numel
(),
stride
,
out_ptr
);
}
template
<
typename
T
,
typename
Context
>
void
SparseMaskHelperKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
mask_indices
,
DenseTensor
*
out
)
{
PD_DISPATCH_INTEGRAL_TYPES
(
x
.
non_zero_indices
().
dtype
(),
"SparseMaskHelperGPUKernel"
,
([
&
]
{
SparseMaskHelperGPUKernel
<
T
,
data_t
>
(
dev_ctx
,
x
,
mask_indices
,
out
);
}));
}
}
// namespace sparse
}
// namespace phi
...
...
@@ -138,3 +288,17 @@ PD_REGISTER_KERNEL(sparse_mask,
int64_t
)
{
kernel
->
InputAt
(
1
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_COO
);
}
PD_REGISTER_KERNEL
(
sparse_mask_helper
,
GPU
,
ALL_LAYOUT
,
phi
::
sparse
::
SparseMaskHelperKernel
,
float
,
double
,
phi
::
dtype
::
float16
,
uint8_t
,
int16_t
,
int
,
int64_t
)
{
kernel
->
InputAt
(
0
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_COO
);
}
paddle/phi/kernels/sparse/gpu/sparse_utils_kernel.cu
浏览文件 @
44d8c6ed
...
...
@@ -665,3 +665,15 @@ PD_REGISTER_KERNEL(csr_values,
int64_t
)
{
kernel
->
InputAt
(
0
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_COO
);
}
PD_REGISTER_KERNEL
(
sparse_coo_tensor
,
GPU
,
ALL_LAYOUT
,
phi
::
sparse
::
SparseCooTensorKernel
,
float
,
double
,
phi
::
dtype
::
float16
,
uint8_t
,
int16_t
,
int
,
int64_t
)
{}
paddle/phi/kernels/sparse/sparse_mask_kernel.h
浏览文件 @
44d8c6ed
...
...
@@ -26,5 +26,11 @@ void SparseMaskKernel(const Context& dev_ctx,
const
SparseCooTensor
&
mask
,
SparseCooTensor
*
out
);
template
<
typename
T
,
typename
Context
>
void
SparseMaskHelperKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
mask_indices
,
DenseTensor
*
out
);
}
// namespace sparse
}
// namespace phi
paddle/phi/kernels/sparse/sparse_utils_grad_kernel.cc
浏览文件 @
44d8c6ed
...
...
@@ -66,6 +66,19 @@ PD_REGISTER_KERNEL(sparse_coo_to_dense_grad,
kernel
->
InputAt
(
0
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_COO
);
}
PD_REGISTER_KERNEL
(
sparse_coo_tensor_grad
,
CPU
,
ALL_LAYOUT
,
phi
::
sparse
::
SparseCooTensorGradKernel
,
float
,
double
,
uint8_t
,
int16_t
,
int
,
int64_t
)
{
kernel
->
InputAt
(
1
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_COO
);
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_REGISTER_KERNEL
(
coo_values_grad
,
GPU
,
...
...
@@ -95,4 +108,16 @@ PD_REGISTER_KERNEL(sparse_coo_to_dense_grad,
int64_t
)
{
kernel
->
InputAt
(
0
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_COO
);
}
PD_REGISTER_KERNEL
(
sparse_coo_tensor_grad
,
GPU
,
ALL_LAYOUT
,
phi
::
sparse
::
SparseCooTensorGradKernel
,
float
,
double
,
uint8_t
,
int16_t
,
int
,
int64_t
)
{
kernel
->
InputAt
(
1
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_COO
);
}
#endif
paddle/phi/kernels/sparse/sparse_utils_grad_kernel.h
浏览文件 @
44d8c6ed
...
...
@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/sparse_coo_tensor.h"
#include "paddle/phi/kernels/sparse/sparse_mask_kernel.h"
namespace
phi
{
namespace
sparse
{
...
...
@@ -32,5 +33,13 @@ void SparseCooToDenseGradKernel(const Context& dev_ctx,
const
DenseTensor
&
out_grad
,
SparseCooTensor
*
x_grad
);
template
<
typename
T
,
typename
Context
>
void
SparseCooTensorGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
indices
,
const
SparseCooTensor
&
out_grad
,
DenseTensor
*
values_grad
)
{
SparseMaskHelperKernel
<
T
,
Context
>
(
dev_ctx
,
out_grad
,
indices
,
values_grad
);
}
}
// namespace sparse
}
// namespace phi
paddle/phi/kernels/sparse/sparse_utils_kernel.h
浏览文件 @
44d8c6ed
...
...
@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#include "paddle/phi/api/lib/utils/storage.h"
#include "paddle/phi/common/int_array.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/sparse_coo_tensor.h"
#include "paddle/phi/core/sparse_csr_tensor.h"
...
...
@@ -147,5 +148,16 @@ void CsrValuesKernel(const Context& dev_ctx,
*
out
=
x
.
non_zero_elements
();
}
template
<
typename
T
,
typename
Context
>
void
SparseCooTensorKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
values
,
const
DenseTensor
&
indices
,
const
IntArray
&
dense_shape
,
SparseCooTensor
*
out
)
{
*
out
=
SparseCooTensor
(
indices
,
values
,
phi
::
make_ddim
(
dense_shape
.
GetData
()));
// TODO(zhangkaihuo): sort and merge the dumplicate indices
}
}
// namespace sparse
}
// namespace phi
python/paddle/fluid/tests/unittests/test_sparse_utils_op.py
浏览文件 @
44d8c6ed
...
...
@@ -134,9 +134,11 @@ class TestSparseConvert(unittest.TestCase):
#test to_sparse_coo_grad backward
out_grad_indices
=
[[
0
,
1
],
[
0
,
1
]]
out_grad_values
=
[
2.0
,
3.0
]
out_grad
=
core
.
eager
.
sparse_coo_tensor
(
out_grad
=
paddle
.
sparse
.
sparse_coo_tensor
(
paddle
.
to_tensor
(
out_grad_indices
),
paddle
.
to_tensor
(
out_grad_values
),
out
.
shape
,
True
)
paddle
.
to_tensor
(
out_grad_values
),
shape
=
out
.
shape
,
stop_gradient
=
True
)
out
.
backward
(
out_grad
)
assert
np
.
array_equal
(
dense_x
.
grad
.
numpy
(),
out_grad
.
to_dense
().
numpy
())
...
...
@@ -145,9 +147,11 @@ class TestSparseConvert(unittest.TestCase):
with
_test_eager_guard
():
indices
=
[[
0
,
0
,
1
,
2
,
2
],
[
1
,
3
,
2
,
0
,
1
]]
values
=
[
1.0
,
2.0
,
3.0
,
4.0
,
5.0
]
sparse_x
=
core
.
eager
.
sparse_coo_tensor
(
sparse_x
=
paddle
.
sparse
.
sparse_coo_tensor
(
paddle
.
to_tensor
(
indices
),
paddle
.
to_tensor
(
values
),
[
3
,
4
],
False
)
paddle
.
to_tensor
(
values
),
shape
=
[
3
,
4
],
stop_gradient
=
False
)
dense_tensor
=
sparse_x
.
to_dense
()
#test to_dense_grad backward
out_grad
=
[[
1.0
,
2.0
,
3.0
,
4.0
],
[
5.0
,
6.0
,
7.0
,
8.0
],
...
...
@@ -158,6 +162,17 @@ class TestSparseConvert(unittest.TestCase):
assert
np
.
array_equal
(
correct_x_grad
,
sparse_x
.
grad
.
values
().
numpy
())
paddle
.
device
.
set_device
(
"cpu"
)
sparse_x_cpu
=
paddle
.
sparse
.
sparse_coo_tensor
(
paddle
.
to_tensor
(
indices
),
paddle
.
to_tensor
(
values
),
shape
=
[
3
,
4
],
stop_gradient
=
False
)
dense_tensor_cpu
=
sparse_x_cpu
.
to_dense
()
dense_tensor_cpu
.
backward
(
paddle
.
to_tensor
(
out_grad
))
assert
np
.
array_equal
(
correct_x_grad
,
sparse_x_cpu
.
grad
.
values
().
numpy
())
def
test_to_sparse_csr
(
self
):
with
_test_eager_guard
():
x
=
[[
0
,
1
,
0
,
2
],
[
0
,
0
,
3
,
0
],
[
4
,
5
,
0
,
0
]]
...
...
@@ -177,15 +192,52 @@ class TestSparseConvert(unittest.TestCase):
with
_test_eager_guard
():
indices
=
[[
0
,
0
,
1
,
2
,
2
],
[
1
,
3
,
2
,
0
,
1
]]
values
=
[
1.0
,
2.0
,
3.0
,
4.0
,
5.0
]
sparse_x
=
core
.
eager
.
sparse_coo_tensor
(
sparse_x
=
paddle
.
sparse
.
sparse_coo_tensor
(
paddle
.
to_tensor
(
indices
),
paddle
.
to_tensor
(
values
),
[
3
,
4
],
False
)
paddle
.
to_tensor
(
values
),
shape
=
[
3
,
4
],
stop_gradient
=
False
)
values_tensor
=
sparse_x
.
values
()
out_grad
=
[
2.0
,
3.0
,
5.0
,
8.0
,
9.0
]
# test coo_values_grad
values_tensor
.
backward
(
paddle
.
to_tensor
(
out_grad
))
assert
np
.
array_equal
(
out_grad
,
sparse_x
.
grad
.
values
().
numpy
())
def
test_sparse_coo_tensor_grad
(
self
):
with
_test_eager_guard
():
indices
=
[[
0
,
1
],
[
0
,
1
]]
values
=
[
1
,
2
]
indices
=
paddle
.
to_tensor
(
indices
,
dtype
=
'int32'
)
values
=
paddle
.
to_tensor
(
values
,
dtype
=
'float32'
,
stop_gradient
=
False
)
sparse_x
=
paddle
.
sparse
.
sparse_coo_tensor
(
indices
,
values
,
shape
=
[
2
,
2
],
stop_gradient
=
False
)
grad_indices
=
[[
0
,
1
],
[
1
,
1
]]
grad_values
=
[
2
,
3
]
grad_indices
=
paddle
.
to_tensor
(
grad_indices
,
dtype
=
'int32'
)
grad_values
=
paddle
.
to_tensor
(
grad_values
,
dtype
=
'float32'
)
sparse_out_grad
=
paddle
.
sparse
.
sparse_coo_tensor
(
grad_indices
,
grad_values
,
shape
=
[
2
,
2
])
sparse_x
.
backward
(
sparse_out_grad
)
correct_values_grad
=
[
0
,
3
]
assert
np
.
array_equal
(
correct_values_grad
,
values
.
grad
.
numpy
())
place
=
core
.
CPUPlace
()
indices_cpu
=
paddle
.
to_tensor
(
indices
,
dtype
=
'int32'
,
place
=
place
)
values_cpu
=
paddle
.
to_tensor
(
values
,
dtype
=
'float32'
,
place
=
place
,
stop_gradient
=
False
)
sparse_x_cpu
=
paddle
.
sparse
.
sparse_coo_tensor
(
indices_cpu
,
values_cpu
,
shape
=
[
2
,
2
],
place
=
place
,
stop_gradient
=
False
)
sparse_out_grad_cpu
=
paddle
.
sparse
.
sparse_coo_tensor
(
grad_indices
,
grad_values
,
shape
=
[
2
,
2
],
place
=
place
)
sparse_x_cpu
.
backward
(
sparse_out_grad_cpu
)
assert
np
.
array_equal
(
correct_values_grad
,
values_cpu
.
grad
.
numpy
())
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/sparse/creation.py
浏览文件 @
44d8c6ed
...
...
@@ -14,6 +14,7 @@
from
paddle
import
_C_ops
from
..framework
import
core
,
dygraph_only
from
..framework
import
_current_expected_place
,
_get_paddle_place
from
..tensor
import
to_tensor
from
..tensor
import
max
from
..fluid.data_feeder
import
check_variable_and_dtype
,
check_type
,
check_dtype
,
convert_dtype
...
...
@@ -38,6 +39,18 @@ def _infer_dense_shape(indices):
return
list
(
lens
.
numpy
())
def
_get_place
(
place
):
place
=
_get_paddle_place
(
place
)
if
place
is
None
:
place
=
_current_expected_place
()
elif
not
isinstance
(
place
,
(
core
.
Place
,
core
.
CPUPlace
,
core
.
CUDAPinnedPlace
,
core
.
CUDAPlace
)):
raise
ValueError
(
"'place' must be any of paddle.Place, paddle.CPUPlace, paddle.CUDAPinnedPlace, paddle.CUDAPlace"
)
return
place
@
dygraph_only
def
sparse_coo_tensor
(
indices
,
values
,
...
...
@@ -94,6 +107,8 @@ def sparse_coo_tensor(indices,
# values=[1., 2., 3.])
"""
place
=
_get_place
(
place
)
if
not
isinstance
(
indices
,
core
.
eager
.
Tensor
):
indices
=
to_tensor
(
indices
,
dtype
=
None
,
place
=
place
,
stop_gradient
=
True
)
...
...
@@ -101,13 +116,20 @@ def sparse_coo_tensor(indices,
values
=
to_tensor
(
values
,
dtype
,
place
,
stop_gradient
)
if
len
(
indices
.
shape
)
!=
2
:
raise
ValueError
(
"'indices' must be 2-D."
)
if
place
is
not
None
:
if
not
indices
.
place
.
_equals
(
place
):
indices
=
indices
.
_copy_to
(
place
,
False
)
if
not
values
.
place
.
_equals
(
place
):
values
=
values
.
_copy_to
(
place
,
False
)
values
=
_handle_dtype
(
values
,
dtype
)
values
.
stop_gradient
=
stop_gradient
if
shape
is
None
:
shape
=
_infer_dense_shape
(
indices
)
return
core
.
eager
.
sparse_coo_tensor
(
indices
,
values
,
shape
,
stop_gradient
)
return
_C_ops
.
final_state_sparse_create_sparse_coo_tensor
(
values
,
indices
,
shape
)
#TODO: need to support shape is None
...
...
@@ -171,6 +193,9 @@ def sparse_csr_tensor(crows,
# cols=[1, 3, 2, 0, 1],
# values=[1, 2, 3, 4, 5])
"""
place
=
_get_place
(
place
)
if
not
isinstance
(
crows
,
core
.
eager
.
Tensor
):
crows
=
to_tensor
(
crows
,
dtype
=
None
,
place
=
place
,
stop_gradient
=
True
)
if
not
isinstance
(
cols
,
core
.
eager
.
Tensor
):
...
...
@@ -182,10 +207,15 @@ def sparse_csr_tensor(crows,
"SparseCsrTensor only support 2-D or 3-D matrix. The 'crows', 'cols' and 'values' must be 1-D."
)
if
place
is
not
None
:
if
not
crows
.
place
.
_equals
(
place
)
:
crows
=
crows
.
_copy_to
(
place
,
False
)
if
not
cols
.
place
.
_equals
(
place
):
cols
=
cols
.
_copy_to
(
place
,
False
)
if
not
values
.
place
.
_equals
(
place
):
values
=
values
.
_copy_to
(
place
,
False
)
values
=
_handle_dtype
(
values
,
dtype
)
values
.
stop_gradient
=
stop_gradient
return
core
.
eager
.
sparse_csr_tensor
(
crows
,
cols
,
values
,
shape
,
stop_gradient
)
python/paddle/utils/code_gen/sparse_api.yaml
浏览文件 @
44d8c6ed
...
...
@@ -21,6 +21,14 @@
layout
:
x
backward
:
coo_values_grad
-
api
:
create_sparse_coo_tensor
args
:
(Tensor values, Tensor indices, IntArray dense_shape)
output
:
Tensor(out@SparseCooTensor)
kernel
:
func
:
sparse_coo_tensor
layout
:
values
backward
:
create_sparse_coo_tensor_grad
-
api
:
csr_values
args
:
(Tensor x)
output
:
Tensor(out@DenseTensor)
...
...
python/paddle/utils/code_gen/sparse_bw_api.yaml
浏览文件 @
44d8c6ed
...
...
@@ -19,6 +19,13 @@
kernel
:
func
:
coo_values_grad
-
backward_api
:
create_sparse_coo_tensor_grad
forward
:
create_sparse_coo_tensor(Tensor values, Tensor indices, IntArray dense_shape) -> Tensor(out@SparseCooTensor)
args
:
(Tensor indices, Tensor out_grad)
output
:
Tensor(values_grad@DenseTensor)
kernel
:
func
:
sparse_coo_tensor_grad
-
backward_api
:
dense_to_coo_grad
forward
:
dense_to_coo(Tensor x, int64_t sparse_dim) -> Tensor(out@SparseCooTensor)
args
:
(Tensor out_grad)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录