Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
acd08a9b
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
acd08a9b
编写于
4月 13, 2022
作者:
Z
zhangkaihuo
提交者:
GitHub
4月 13, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add kernel sparse_mask_helper; sparse_coo_tensor_grad (#41586)
上级
d84934da
变更
13
隐藏空白更改
内联
并排
Showing
13 changed file
with
476 addition
and
21 deletion
+476
-21
paddle/phi/kernels/funcs/sparse/common_shape.h
paddle/phi/kernels/funcs/sparse/common_shape.h
+39
-0
paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc
paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc
+90
-11
paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc
paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc
+12
-0
paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu
paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu
+165
-1
paddle/phi/kernels/sparse/gpu/sparse_utils_kernel.cu
paddle/phi/kernels/sparse/gpu/sparse_utils_kernel.cu
+12
-0
paddle/phi/kernels/sparse/sparse_mask_kernel.h
paddle/phi/kernels/sparse/sparse_mask_kernel.h
+6
-0
paddle/phi/kernels/sparse/sparse_utils_grad_kernel.cc
paddle/phi/kernels/sparse/sparse_utils_grad_kernel.cc
+25
-0
paddle/phi/kernels/sparse/sparse_utils_grad_kernel.h
paddle/phi/kernels/sparse/sparse_utils_grad_kernel.h
+9
-0
paddle/phi/kernels/sparse/sparse_utils_kernel.h
paddle/phi/kernels/sparse/sparse_utils_kernel.h
+12
-0
python/paddle/fluid/tests/unittests/test_sparse_utils_op.py
python/paddle/fluid/tests/unittests/test_sparse_utils_op.py
+58
-6
python/paddle/sparse/creation.py
python/paddle/sparse/creation.py
+33
-3
python/paddle/utils/code_gen/sparse_api.yaml
python/paddle/utils/code_gen/sparse_api.yaml
+8
-0
python/paddle/utils/code_gen/sparse_bw_api.yaml
python/paddle/utils/code_gen/sparse_bw_api.yaml
+7
-0
未找到文件。
paddle/phi/kernels/funcs/sparse/common_shape.h
浏览文件 @
acd08a9b
...
...
@@ -40,6 +40,45 @@ inline const DDim InferDenseDims(const DDim& x_dims,
return
values_dims
;
}
template
<
typename
IntT
>
inline
const
IntT
HOSTDEVICE
IndicesToIndex
(
const
IntT
*
indices
,
const
IntT
*
sparse_offsets
,
const
int64_t
non_zero_num
,
const
int64_t
sparse_dim
,
const
int
i
)
{
IntT
index
=
0
;
for
(
IntT
j
=
0
;
j
<
sparse_dim
;
j
++
)
{
index
+=
indices
[
j
*
non_zero_num
+
i
]
*
sparse_offsets
[
j
];
}
return
index
;
}
template
<
typename
IntT
>
inline
void
HOSTDEVICE
FlattenIndices
(
const
IntT
*
indices
,
const
IntT
*
sparse_offsets
,
const
int64_t
non_zero_num
,
const
int64_t
sparse_dim
,
const
int
start
,
const
int
stride
,
IntT
*
out
)
{
for
(
int
i
=
start
;
i
<
non_zero_num
;
i
+=
stride
)
{
out
[
i
]
=
IndicesToIndex
(
indices
,
sparse_offsets
,
non_zero_num
,
sparse_dim
,
i
);
}
}
// 1. indices.dims().size() == 2
template
<
typename
IntT
>
inline
void
CalcOffsetsPerDim
(
const
DDim
&
dims
,
const
int64_t
sparse_dim
,
std
::
vector
<
IntT
>*
offsets
)
{
IntT
offset
=
1
;
for
(
IntT
i
=
sparse_dim
-
1
;
i
>=
0
;
i
--
)
{
(
*
offsets
)[
i
]
=
offset
;
offset
*=
dims
[
i
];
}
}
}
// namespace sparse
}
// namespace funcs
}
// namespace phi
paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc
浏览文件 @
acd08a9b
...
...
@@ -19,6 +19,7 @@ limitations under the License. */
#include "paddle/phi/kernels/copy_kernel.h"
#include "paddle/phi/kernels/empty_kernel.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/sparse/common_shape.h"
#include "paddle/phi/api/ext/dispatch.h"
...
...
@@ -38,12 +39,6 @@ void SparseMaskCPUKernel(const CPUContext& dev_ctx,
const
DenseTensor
&
indices
=
mask
.
non_zero_indices
();
const
DenseTensor
&
values
=
mask
.
non_zero_elements
();
int
sparse_dim
=
indices
.
dims
().
size
();
std
::
vector
<
int64_t
>
sparse_offsets
(
sparse_dim
);
int64_t
offset
=
1
;
for
(
int
i
=
sparse_dim
-
1
;
i
>=
0
;
i
--
)
{
sparse_offsets
[
i
]
=
offset
;
offset
*=
dims
[
i
];
}
DenseTensor
out_indices
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
indices
);
DenseTensor
out_values
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
values
);
...
...
@@ -51,21 +46,25 @@ void SparseMaskCPUKernel(const CPUContext& dev_ctx,
// the out_indices is same as indices of mask
phi
::
Copy
(
dev_ctx
,
indices
,
dev_ctx
.
GetPlace
(),
false
,
&
out_indices
);
const
IntT
*
indices_ptr
=
indices
.
data
<
IntT
>
();
T
*
out_values_ptr
=
out_values
.
data
<
T
>
();
const
T
*
x_ptr
=
x
.
data
<
T
>
();
const
int64_t
non_zero_num
=
mask
.
nnz
();
auto
dims_2d
=
flatten_to_2d
(
dims
,
sparse_dim
);
const
int
cols
=
dims_2d
[
1
];
const
IntT
*
indices_ptr
=
indices
.
data
<
IntT
>
();
std
::
vector
<
IntT
>
out_indexs
(
non_zero_num
),
sparse_offsets
(
sparse_dim
);
phi
::
funcs
::
sparse
::
CalcOffsetsPerDim
<
IntT
>
(
dims
,
sparse_dim
,
&
sparse_offsets
);
for
(
int64_t
i
=
0
;
i
<
non_zero_num
;
i
++
)
{
int64_t
index
=
0
;
for
(
int
j
=
0
;
j
<
sparse_dim
;
j
++
)
{
index
+=
indices_ptr
[
j
*
non_zero_num
+
i
]
*
sparse_offsets
[
j
];
}
int64_t
index
=
phi
::
funcs
::
sparse
::
IndicesToIndex
<
IntT
>
(
indices_ptr
,
sparse_offsets
.
data
(),
non_zero_num
,
sparse_dim
,
i
);
memcpy
(
out_values_ptr
+
i
*
cols
,
x_ptr
+
index
*
cols
,
cols
*
sizeof
(
T
));
}
out
->
SetMember
(
out_indices
,
out_values
,
dims
,
true
);
}
...
...
@@ -85,6 +84,73 @@ void SparseMaskKernel(const Context& dev_ctx,
}));
}
template
<
typename
T
,
typename
IntT
>
void
SparseMaskHelperCPUKernel
(
const
CPUContext
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
mask_indices
,
DenseTensor
*
out
)
{
PADDLE_ENFORCE_EQ
(
mask_indices
.
dims
().
size
(),
2
,
phi
::
errors
::
InvalidArgument
(
"the mask_indices must be 2-D tensor"
));
const
int64_t
sparse_dim
=
x
.
non_zero_indices
().
dims
()[
0
];
std
::
vector
<
IntT
>
sparse_offsets
(
sparse_dim
),
x_indexs
(
x
.
nnz
()),
mask_indexs
(
mask_indices
.
dims
()[
1
]);
phi
::
funcs
::
sparse
::
CalcOffsetsPerDim
<
IntT
>
(
x
.
dims
(),
sparse_dim
,
&
sparse_offsets
);
phi
::
funcs
::
sparse
::
FlattenIndices
(
x
.
non_zero_indices
().
data
<
IntT
>
(),
sparse_offsets
.
data
(),
x
.
nnz
(),
sparse_dim
,
0
,
1
,
x_indexs
.
data
());
phi
::
funcs
::
sparse
::
FlattenIndices
(
mask_indices
.
data
<
IntT
>
(),
sparse_offsets
.
data
(),
x
.
nnz
(),
sparse_dim
,
0
,
1
,
mask_indexs
.
data
());
std
::
unordered_map
<
IntT
,
uint64_t
>
x_indexs_map
;
for
(
uint64_t
i
=
0
;
i
<
x_indexs
.
size
();
i
++
)
{
x_indexs_map
[
x_indexs
[
i
]]
=
i
;
}
*
out
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
x
.
non_zero_elements
());
T
*
out_ptr
=
out
->
data
<
T
>
();
memset
(
out_ptr
,
static_cast
<
T
>
(
0
),
out
->
numel
()
*
sizeof
(
T
));
const
int64_t
stride
=
x
.
dims
().
size
()
==
sparse_dim
?
1
:
x
.
dims
().
size
()
-
sparse_dim
;
const
T
*
in_ptr
=
x
.
non_zero_elements
().
data
<
T
>
();
// TODO(zhangkaihuo): multithreading can be used for acceleration
for
(
uint64_t
i
=
0
;
i
<
mask_indexs
.
size
();
i
++
)
{
auto
iter
=
x_indexs_map
.
find
(
mask_indexs
[
i
]);
if
(
iter
!=
x_indexs_map
.
end
())
{
memcpy
(
out_ptr
+
i
*
stride
,
in_ptr
+
iter
->
second
*
stride
,
stride
*
sizeof
(
T
));
}
}
}
/**
* @brief filter values from x.values() using mask_indices
*/
template
<
typename
T
,
typename
Context
>
void
SparseMaskHelperKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
mask_indices
,
DenseTensor
*
out
)
{
PD_DISPATCH_INTEGRAL_TYPES
(
x
.
non_zero_indices
().
dtype
(),
"SparseMaskHelperCPUKernel"
,
([
&
]
{
SparseMaskHelperCPUKernel
<
T
,
data_t
>
(
dev_ctx
,
x
,
mask_indices
,
out
);
}));
}
}
// namespace sparse
}
// namespace phi
...
...
@@ -101,3 +167,16 @@ PD_REGISTER_KERNEL(sparse_mask,
int64_t
)
{
kernel
->
InputAt
(
1
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_COO
);
}
PD_REGISTER_KERNEL
(
sparse_mask_helper
,
CPU
,
ALL_LAYOUT
,
phi
::
sparse
::
SparseMaskHelperKernel
,
float
,
double
,
uint8_t
,
int16_t
,
int
,
int64_t
)
{
kernel
->
InputAt
(
0
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_COO
);
}
paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc
浏览文件 @
acd08a9b
...
...
@@ -394,3 +394,15 @@ PD_REGISTER_KERNEL(csr_values,
int64_t
)
{
kernel
->
InputAt
(
0
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_COO
);
}
PD_REGISTER_KERNEL
(
sparse_coo_tensor
,
CPU
,
ALL_LAYOUT
,
phi
::
sparse
::
SparseCooTensorKernel
,
float
,
double
,
phi
::
dtype
::
float16
,
uint8_t
,
int16_t
,
int
,
int64_t
)
{}
paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu
浏览文件 @
acd08a9b
...
...
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <thrust/binary_search.h>
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/core/ddim.h"
...
...
@@ -20,6 +22,7 @@ limitations under the License. */
#include "paddle/phi/kernels/copy_kernel.h"
#include "paddle/phi/kernels/empty_kernel.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/sparse/common_shape.h"
#include "paddle/phi/kernels/sparse/sparse_mask_kernel.h"
#include "paddle/phi/api/ext/dispatch.h"
...
...
@@ -59,7 +62,7 @@ void SparseMaskGPUKernel(const GPUContext& dev_ctx,
const
DenseTensor
&
indices
=
mask
.
non_zero_indices
();
const
DenseTensor
&
values
=
mask
.
non_zero_elements
();
int
sparse_dim
=
indices
.
dims
().
size
();
DenseTensor
sparse_offsets
=
phi
::
Empty
(
DenseTensor
sparse_offsets
=
phi
::
Empty
<
GPUContext
>
(
dev_ctx
,
DenseTensorMeta
(
DataType
::
INT64
,
{
sparse_dim
},
DataLayout
::
NCHW
));
std
::
vector
<
int64_t
>
h_sparse_offsets
(
sparse_dim
);
...
...
@@ -121,6 +124,153 @@ void SparseMaskKernel(const Context& dev_ctx,
}));
}
// TODO(zhangkaihuo): Use an op to realize the function of FlattenIndices
template
<
typename
IntT
>
__global__
void
FlattenIndicesKernel
(
const
IntT
*
indices
,
const
IntT
*
sparse_offsets
,
const
int64_t
non_zero_num
,
const
int64_t
sparse_dim
,
IntT
*
out
)
{
int
tid
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
phi
::
funcs
::
sparse
::
FlattenIndices
<
IntT
>
(
indices
,
sparse_offsets
,
non_zero_num
,
sparse_dim
,
tid
,
gridDim
.
x
*
blockDim
.
x
,
out
);
}
template
<
typename
T
,
typename
IntT
>
__global__
void
SparseMaskCopyKernel
(
const
IntT
*
x_indexs
,
const
IntT
*
mask_indexs
,
const
IntT
*
bound_out
,
const
T
*
x_values
,
const
int64_t
n
,
const
int64_t
stride
,
T
*
out_values
)
{
CUDA_KERNEL_LOOP_TYPE
(
i
,
n
,
int64_t
)
{
const
IntT
j
=
bound_out
[
i
];
if
(
j
>=
0
&&
j
<
n
&&
mask_indexs
[
i
]
==
x_indexs
[
j
])
{
for
(
int
k
=
0
;
k
<
stride
;
k
++
)
{
out_values
[
i
*
stride
+
k
]
=
x_values
[
j
*
stride
+
k
];
}
}
}
}
template
<
typename
T
,
typename
IntT
>
void
SparseMaskHelperGPUKernel
(
const
GPUContext
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
mask_indices
,
DenseTensor
*
out
)
{
PADDLE_ENFORCE_EQ
(
mask_indices
.
dims
().
size
(),
2
,
phi
::
errors
::
InvalidArgument
(
"the mask_indices must be 2-D tensor"
));
const
int64_t
sparse_dim
=
x
.
non_zero_indices
().
dims
()[
0
];
auto
indices_dtype
=
paddle
::
experimental
::
CppTypeToDataType
<
IntT
>::
Type
();
std
::
vector
<
IntT
>
sparse_offsets
(
sparse_dim
);
DenseTensorMeta
x_indexs_meta
(
indices_dtype
,
{
x
.
nnz
()},
DataLayout
::
NCHW
);
DenseTensorMeta
mask_indexs_meta
(
indices_dtype
,
{
mask_indices
.
dims
()[
1
]},
DataLayout
::
NCHW
);
DenseTensorMeta
sparse_offset_meta
(
indices_dtype
,
{
sparse_dim
},
DataLayout
::
NCHW
);
DenseTensor
x_indexs
=
phi
::
Empty
<
GPUContext
>
(
dev_ctx
,
std
::
move
(
x_indexs_meta
));
DenseTensor
mask_indexs
=
phi
::
Empty
<
GPUContext
>
(
dev_ctx
,
std
::
move
(
mask_indexs_meta
));
DenseTensor
bound_out
=
phi
::
Empty
<
GPUContext
>
(
dev_ctx
,
std
::
move
(
mask_indexs_meta
));
DenseTensor
d_sparse_offsets
=
phi
::
Empty
<
GPUContext
>
(
dev_ctx
,
std
::
move
(
sparse_offset_meta
));
IntT
*
x_indexs_ptr
=
x_indexs
.
data
<
IntT
>
();
IntT
*
mask_indexs_ptr
=
mask_indexs
.
data
<
IntT
>
();
IntT
*
bound_out_ptr
=
bound_out
.
data
<
IntT
>
();
// 1. calc the offsets of per dim
phi
::
funcs
::
sparse
::
CalcOffsetsPerDim
(
x
.
dims
(),
sparse_dim
,
&
sparse_offsets
);
// 2. copy sparse_offsets to device
phi
::
backends
::
gpu
::
GpuMemcpyAsync
(
d_sparse_offsets
.
data
<
IntT
>
(),
sparse_offsets
.
data
(),
sizeof
(
IntT
)
*
sparse_dim
,
#ifdef PADDLE_WITH_HIP
hipMemcpyHostToDevice
,
#else
cudaMemcpyHostToDevice
,
#endif
dev_ctx
.
stream
());
// 3. flatten x indices and mask indices
auto
config
=
phi
::
backends
::
gpu
::
GetGpuLaunchConfig1D
(
dev_ctx
,
x_indexs
.
numel
(),
1
);
FlattenIndicesKernel
<<<
config
.
block_per_grid
,
config
.
thread_per_block
,
0
,
dev_ctx
.
stream
()
>>>
(
x
.
non_zero_indices
().
data
<
IntT
>
(),
d_sparse_offsets
.
data
<
IntT
>
(),
x_indexs
.
numel
(),
sparse_dim
,
x_indexs_ptr
);
config
=
phi
::
backends
::
gpu
::
GetGpuLaunchConfig1D
(
dev_ctx
,
mask_indexs
.
numel
(),
1
);
FlattenIndicesKernel
<<<
config
.
block_per_grid
,
config
.
thread_per_block
,
0
,
dev_ctx
.
stream
()
>>>
(
mask_indices
.
data
<
IntT
>
(),
d_sparse_offsets
.
data
<
IntT
>
(),
mask_indexs
.
numel
(),
sparse_dim
,
mask_indexs_ptr
);
// 4. call thrust::lower_bound
#ifdef PADDLE_WITH_HIP
thrust
::
lower_bound
(
thrust
::
hip
::
par
.
on
(
dev_ctx
.
stream
()),
#else
thrust
::
lower_bound
(
thrust
::
cuda
::
par
.
on
(
dev_ctx
.
stream
()),
#endif
x_indexs_ptr
,
x_indexs_ptr
+
x_indexs
.
numel
(),
mask_indexs_ptr
,
mask_indexs_ptr
+
mask_indexs
.
numel
(),
bound_out_ptr
);
// 5. copy value to out
*
out
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
x
.
non_zero_elements
());
phi
::
funcs
::
SetConstant
<
GPUContext
,
T
>
set_zero
;
set_zero
(
dev_ctx
,
out
,
static_cast
<
T
>
(
0
));
T
*
out_ptr
=
out
->
data
<
T
>
();
const
int64_t
stride
=
x
.
dims
().
size
()
==
sparse_dim
?
1
:
x
.
dims
().
size
()
-
sparse_dim
;
SparseMaskCopyKernel
<<<
config
.
block_per_grid
,
config
.
thread_per_block
,
0
,
dev_ctx
.
stream
()
>>>
(
x_indexs_ptr
,
mask_indexs_ptr
,
bound_out_ptr
,
x
.
non_zero_elements
().
data
<
T
>
(),
mask_indexs
.
numel
(),
stride
,
out_ptr
);
}
template
<
typename
T
,
typename
Context
>
void
SparseMaskHelperKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
mask_indices
,
DenseTensor
*
out
)
{
PD_DISPATCH_INTEGRAL_TYPES
(
x
.
non_zero_indices
().
dtype
(),
"SparseMaskHelperGPUKernel"
,
([
&
]
{
SparseMaskHelperGPUKernel
<
T
,
data_t
>
(
dev_ctx
,
x
,
mask_indices
,
out
);
}));
}
}
// namespace sparse
}
// namespace phi
...
...
@@ -138,3 +288,17 @@ PD_REGISTER_KERNEL(sparse_mask,
int64_t
)
{
kernel
->
InputAt
(
1
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_COO
);
}
PD_REGISTER_KERNEL
(
sparse_mask_helper
,
GPU
,
ALL_LAYOUT
,
phi
::
sparse
::
SparseMaskHelperKernel
,
float
,
double
,
phi
::
dtype
::
float16
,
uint8_t
,
int16_t
,
int
,
int64_t
)
{
kernel
->
InputAt
(
0
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_COO
);
}
paddle/phi/kernels/sparse/gpu/sparse_utils_kernel.cu
浏览文件 @
acd08a9b
...
...
@@ -665,3 +665,15 @@ PD_REGISTER_KERNEL(csr_values,
int64_t
)
{
kernel
->
InputAt
(
0
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_COO
);
}
PD_REGISTER_KERNEL
(
sparse_coo_tensor
,
GPU
,
ALL_LAYOUT
,
phi
::
sparse
::
SparseCooTensorKernel
,
float
,
double
,
phi
::
dtype
::
float16
,
uint8_t
,
int16_t
,
int
,
int64_t
)
{}
paddle/phi/kernels/sparse/sparse_mask_kernel.h
浏览文件 @
acd08a9b
...
...
@@ -26,5 +26,11 @@ void SparseMaskKernel(const Context& dev_ctx,
const
SparseCooTensor
&
mask
,
SparseCooTensor
*
out
);
template
<
typename
T
,
typename
Context
>
void
SparseMaskHelperKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
mask_indices
,
DenseTensor
*
out
);
}
// namespace sparse
}
// namespace phi
paddle/phi/kernels/sparse/sparse_utils_grad_kernel.cc
浏览文件 @
acd08a9b
...
...
@@ -66,6 +66,19 @@ PD_REGISTER_KERNEL(sparse_coo_to_dense_grad,
kernel
->
InputAt
(
0
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_COO
);
}
PD_REGISTER_KERNEL
(
sparse_coo_tensor_grad
,
CPU
,
ALL_LAYOUT
,
phi
::
sparse
::
SparseCooTensorGradKernel
,
float
,
double
,
uint8_t
,
int16_t
,
int
,
int64_t
)
{
kernel
->
InputAt
(
1
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_COO
);
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_REGISTER_KERNEL
(
coo_values_grad
,
GPU
,
...
...
@@ -95,4 +108,16 @@ PD_REGISTER_KERNEL(sparse_coo_to_dense_grad,
int64_t
)
{
kernel
->
InputAt
(
0
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_COO
);
}
PD_REGISTER_KERNEL
(
sparse_coo_tensor_grad
,
GPU
,
ALL_LAYOUT
,
phi
::
sparse
::
SparseCooTensorGradKernel
,
float
,
double
,
uint8_t
,
int16_t
,
int
,
int64_t
)
{
kernel
->
InputAt
(
1
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_COO
);
}
#endif
paddle/phi/kernels/sparse/sparse_utils_grad_kernel.h
浏览文件 @
acd08a9b
...
...
@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/sparse_coo_tensor.h"
#include "paddle/phi/kernels/sparse/sparse_mask_kernel.h"
namespace
phi
{
namespace
sparse
{
...
...
@@ -32,5 +33,13 @@ void SparseCooToDenseGradKernel(const Context& dev_ctx,
const
DenseTensor
&
out_grad
,
SparseCooTensor
*
x_grad
);
template
<
typename
T
,
typename
Context
>
void
SparseCooTensorGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
indices
,
const
SparseCooTensor
&
out_grad
,
DenseTensor
*
values_grad
)
{
SparseMaskHelperKernel
<
T
,
Context
>
(
dev_ctx
,
out_grad
,
indices
,
values_grad
);
}
}
// namespace sparse
}
// namespace phi
paddle/phi/kernels/sparse/sparse_utils_kernel.h
浏览文件 @
acd08a9b
...
...
@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#include "paddle/phi/api/lib/utils/storage.h"
#include "paddle/phi/common/int_array.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/sparse_coo_tensor.h"
#include "paddle/phi/core/sparse_csr_tensor.h"
...
...
@@ -147,5 +148,16 @@ void CsrValuesKernel(const Context& dev_ctx,
*
out
=
x
.
non_zero_elements
();
}
template
<
typename
T
,
typename
Context
>
void
SparseCooTensorKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
values
,
const
DenseTensor
&
indices
,
const
IntArray
&
dense_shape
,
SparseCooTensor
*
out
)
{
*
out
=
SparseCooTensor
(
indices
,
values
,
phi
::
make_ddim
(
dense_shape
.
GetData
()));
// TODO(zhangkaihuo): sort and merge the dumplicate indices
}
}
// namespace sparse
}
// namespace phi
python/paddle/fluid/tests/unittests/test_sparse_utils_op.py
浏览文件 @
acd08a9b
...
...
@@ -134,9 +134,11 @@ class TestSparseConvert(unittest.TestCase):
#test to_sparse_coo_grad backward
out_grad_indices
=
[[
0
,
1
],
[
0
,
1
]]
out_grad_values
=
[
2.0
,
3.0
]
out_grad
=
core
.
eager
.
sparse_coo_tensor
(
out_grad
=
paddle
.
sparse
.
sparse_coo_tensor
(
paddle
.
to_tensor
(
out_grad_indices
),
paddle
.
to_tensor
(
out_grad_values
),
out
.
shape
,
True
)
paddle
.
to_tensor
(
out_grad_values
),
shape
=
out
.
shape
,
stop_gradient
=
True
)
out
.
backward
(
out_grad
)
assert
np
.
array_equal
(
dense_x
.
grad
.
numpy
(),
out_grad
.
to_dense
().
numpy
())
...
...
@@ -145,9 +147,11 @@ class TestSparseConvert(unittest.TestCase):
with
_test_eager_guard
():
indices
=
[[
0
,
0
,
1
,
2
,
2
],
[
1
,
3
,
2
,
0
,
1
]]
values
=
[
1.0
,
2.0
,
3.0
,
4.0
,
5.0
]
sparse_x
=
core
.
eager
.
sparse_coo_tensor
(
sparse_x
=
paddle
.
sparse
.
sparse_coo_tensor
(
paddle
.
to_tensor
(
indices
),
paddle
.
to_tensor
(
values
),
[
3
,
4
],
False
)
paddle
.
to_tensor
(
values
),
shape
=
[
3
,
4
],
stop_gradient
=
False
)
dense_tensor
=
sparse_x
.
to_dense
()
#test to_dense_grad backward
out_grad
=
[[
1.0
,
2.0
,
3.0
,
4.0
],
[
5.0
,
6.0
,
7.0
,
8.0
],
...
...
@@ -158,6 +162,17 @@ class TestSparseConvert(unittest.TestCase):
assert
np
.
array_equal
(
correct_x_grad
,
sparse_x
.
grad
.
values
().
numpy
())
paddle
.
device
.
set_device
(
"cpu"
)
sparse_x_cpu
=
paddle
.
sparse
.
sparse_coo_tensor
(
paddle
.
to_tensor
(
indices
),
paddle
.
to_tensor
(
values
),
shape
=
[
3
,
4
],
stop_gradient
=
False
)
dense_tensor_cpu
=
sparse_x_cpu
.
to_dense
()
dense_tensor_cpu
.
backward
(
paddle
.
to_tensor
(
out_grad
))
assert
np
.
array_equal
(
correct_x_grad
,
sparse_x_cpu
.
grad
.
values
().
numpy
())
def
test_to_sparse_csr
(
self
):
with
_test_eager_guard
():
x
=
[[
0
,
1
,
0
,
2
],
[
0
,
0
,
3
,
0
],
[
4
,
5
,
0
,
0
]]
...
...
@@ -177,15 +192,52 @@ class TestSparseConvert(unittest.TestCase):
with
_test_eager_guard
():
indices
=
[[
0
,
0
,
1
,
2
,
2
],
[
1
,
3
,
2
,
0
,
1
]]
values
=
[
1.0
,
2.0
,
3.0
,
4.0
,
5.0
]
sparse_x
=
core
.
eager
.
sparse_coo_tensor
(
sparse_x
=
paddle
.
sparse
.
sparse_coo_tensor
(
paddle
.
to_tensor
(
indices
),
paddle
.
to_tensor
(
values
),
[
3
,
4
],
False
)
paddle
.
to_tensor
(
values
),
shape
=
[
3
,
4
],
stop_gradient
=
False
)
values_tensor
=
sparse_x
.
values
()
out_grad
=
[
2.0
,
3.0
,
5.0
,
8.0
,
9.0
]
# test coo_values_grad
values_tensor
.
backward
(
paddle
.
to_tensor
(
out_grad
))
assert
np
.
array_equal
(
out_grad
,
sparse_x
.
grad
.
values
().
numpy
())
def
test_sparse_coo_tensor_grad
(
self
):
with
_test_eager_guard
():
indices
=
[[
0
,
1
],
[
0
,
1
]]
values
=
[
1
,
2
]
indices
=
paddle
.
to_tensor
(
indices
,
dtype
=
'int32'
)
values
=
paddle
.
to_tensor
(
values
,
dtype
=
'float32'
,
stop_gradient
=
False
)
sparse_x
=
paddle
.
sparse
.
sparse_coo_tensor
(
indices
,
values
,
shape
=
[
2
,
2
],
stop_gradient
=
False
)
grad_indices
=
[[
0
,
1
],
[
1
,
1
]]
grad_values
=
[
2
,
3
]
grad_indices
=
paddle
.
to_tensor
(
grad_indices
,
dtype
=
'int32'
)
grad_values
=
paddle
.
to_tensor
(
grad_values
,
dtype
=
'float32'
)
sparse_out_grad
=
paddle
.
sparse
.
sparse_coo_tensor
(
grad_indices
,
grad_values
,
shape
=
[
2
,
2
])
sparse_x
.
backward
(
sparse_out_grad
)
correct_values_grad
=
[
0
,
3
]
assert
np
.
array_equal
(
correct_values_grad
,
values
.
grad
.
numpy
())
place
=
core
.
CPUPlace
()
indices_cpu
=
paddle
.
to_tensor
(
indices
,
dtype
=
'int32'
,
place
=
place
)
values_cpu
=
paddle
.
to_tensor
(
values
,
dtype
=
'float32'
,
place
=
place
,
stop_gradient
=
False
)
sparse_x_cpu
=
paddle
.
sparse
.
sparse_coo_tensor
(
indices_cpu
,
values_cpu
,
shape
=
[
2
,
2
],
place
=
place
,
stop_gradient
=
False
)
sparse_out_grad_cpu
=
paddle
.
sparse
.
sparse_coo_tensor
(
grad_indices
,
grad_values
,
shape
=
[
2
,
2
],
place
=
place
)
sparse_x_cpu
.
backward
(
sparse_out_grad_cpu
)
assert
np
.
array_equal
(
correct_values_grad
,
values_cpu
.
grad
.
numpy
())
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/sparse/creation.py
浏览文件 @
acd08a9b
...
...
@@ -14,6 +14,7 @@
from
paddle
import
_C_ops
from
..framework
import
core
,
dygraph_only
from
..framework
import
_current_expected_place
,
_get_paddle_place
from
..tensor
import
to_tensor
from
..tensor
import
max
from
..fluid.data_feeder
import
check_variable_and_dtype
,
check_type
,
check_dtype
,
convert_dtype
...
...
@@ -38,6 +39,18 @@ def _infer_dense_shape(indices):
return
list
(
lens
.
numpy
())
def
_get_place
(
place
):
place
=
_get_paddle_place
(
place
)
if
place
is
None
:
place
=
_current_expected_place
()
elif
not
isinstance
(
place
,
(
core
.
Place
,
core
.
CPUPlace
,
core
.
CUDAPinnedPlace
,
core
.
CUDAPlace
)):
raise
ValueError
(
"'place' must be any of paddle.Place, paddle.CPUPlace, paddle.CUDAPinnedPlace, paddle.CUDAPlace"
)
return
place
@
dygraph_only
def
sparse_coo_tensor
(
indices
,
values
,
...
...
@@ -94,6 +107,8 @@ def sparse_coo_tensor(indices,
# values=[1., 2., 3.])
"""
place
=
_get_place
(
place
)
if
not
isinstance
(
indices
,
core
.
eager
.
Tensor
):
indices
=
to_tensor
(
indices
,
dtype
=
None
,
place
=
place
,
stop_gradient
=
True
)
...
...
@@ -101,13 +116,20 @@ def sparse_coo_tensor(indices,
values
=
to_tensor
(
values
,
dtype
,
place
,
stop_gradient
)
if
len
(
indices
.
shape
)
!=
2
:
raise
ValueError
(
"'indices' must be 2-D."
)
if
place
is
not
None
:
if
not
indices
.
place
.
_equals
(
place
):
indices
=
indices
.
_copy_to
(
place
,
False
)
if
not
values
.
place
.
_equals
(
place
):
values
=
values
.
_copy_to
(
place
,
False
)
values
=
_handle_dtype
(
values
,
dtype
)
values
.
stop_gradient
=
stop_gradient
if
shape
is
None
:
shape
=
_infer_dense_shape
(
indices
)
return
core
.
eager
.
sparse_coo_tensor
(
indices
,
values
,
shape
,
stop_gradient
)
return
_C_ops
.
final_state_sparse_create_sparse_coo_tensor
(
values
,
indices
,
shape
)
#TODO: need to support shape is None
...
...
@@ -171,6 +193,9 @@ def sparse_csr_tensor(crows,
# cols=[1, 3, 2, 0, 1],
# values=[1, 2, 3, 4, 5])
"""
place
=
_get_place
(
place
)
if
not
isinstance
(
crows
,
core
.
eager
.
Tensor
):
crows
=
to_tensor
(
crows
,
dtype
=
None
,
place
=
place
,
stop_gradient
=
True
)
if
not
isinstance
(
cols
,
core
.
eager
.
Tensor
):
...
...
@@ -182,10 +207,15 @@ def sparse_csr_tensor(crows,
"SparseCsrTensor only support 2-D or 3-D matrix. The 'crows', 'cols' and 'values' must be 1-D."
)
if
place
is
not
None
:
if
not
crows
.
place
.
_equals
(
place
)
:
crows
=
crows
.
_copy_to
(
place
,
False
)
if
not
cols
.
place
.
_equals
(
place
):
cols
=
cols
.
_copy_to
(
place
,
False
)
if
not
values
.
place
.
_equals
(
place
):
values
=
values
.
_copy_to
(
place
,
False
)
values
=
_handle_dtype
(
values
,
dtype
)
values
.
stop_gradient
=
stop_gradient
return
core
.
eager
.
sparse_csr_tensor
(
crows
,
cols
,
values
,
shape
,
stop_gradient
)
python/paddle/utils/code_gen/sparse_api.yaml
浏览文件 @
acd08a9b
...
...
@@ -21,6 +21,14 @@
layout
:
x
backward
:
coo_values_grad
-
api
:
create_sparse_coo_tensor
args
:
(Tensor values, Tensor indices, IntArray dense_shape)
output
:
Tensor(out@SparseCooTensor)
kernel
:
func
:
sparse_coo_tensor
layout
:
values
backward
:
create_sparse_coo_tensor_grad
-
api
:
csr_values
args
:
(Tensor x)
output
:
Tensor(out@DenseTensor)
...
...
python/paddle/utils/code_gen/sparse_bw_api.yaml
浏览文件 @
acd08a9b
...
...
@@ -19,6 +19,13 @@
kernel
:
func
:
coo_values_grad
-
backward_api
:
create_sparse_coo_tensor_grad
forward
:
create_sparse_coo_tensor(Tensor values, Tensor indices, IntArray dense_shape) -> Tensor(out@SparseCooTensor)
args
:
(Tensor indices, Tensor out_grad)
output
:
Tensor(values_grad@DenseTensor)
kernel
:
func
:
sparse_coo_tensor_grad
-
backward_api
:
dense_to_coo_grad
forward
:
dense_to_coo(Tensor x, int64_t sparse_dim) -> Tensor(out@SparseCooTensor)
args
:
(Tensor out_grad)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录