Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
016b94c2
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
016b94c2
编写于
8月 22, 2022
作者:
Z
zhangkaihuo
提交者:
GitHub
8月 22, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
rename the member function of SparseTensor (#45291)
上级
ed57237e
变更
18
隐藏空白更改
内联
并排
Showing
18 changed file
with
108 addition
and
79 deletion
+108
-79
paddle/phi/core/sparse_coo_tensor.h
paddle/phi/core/sparse_coo_tensor.h
+14
-0
paddle/phi/core/sparse_csr_tensor.cc
paddle/phi/core/sparse_csr_tensor.cc
+3
-3
paddle/phi/core/sparse_csr_tensor.h
paddle/phi/core/sparse_csr_tensor.h
+21
-0
paddle/phi/kernels/sparse/gpu/coalesce_kernel.cu
paddle/phi/kernels/sparse/gpu/coalesce_kernel.cu
+2
-2
paddle/phi/kernels/sparse/gpu/conv_grad_kernel.cu
paddle/phi/kernels/sparse/gpu/conv_grad_kernel.cu
+12
-13
paddle/phi/kernels/sparse/gpu/conv_kernel.cu
paddle/phi/kernels/sparse/gpu/conv_kernel.cu
+2
-2
paddle/phi/kernels/sparse/gpu/convolution.cu.h
paddle/phi/kernels/sparse/gpu/convolution.cu.h
+6
-8
paddle/phi/kernels/sparse/gpu/full_kernel.cu
paddle/phi/kernels/sparse/gpu/full_kernel.cu
+4
-4
paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu
paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu
+3
-3
paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu
paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu
+2
-2
paddle/phi/kernels/sparse/gpu/mask_kernel.cu
paddle/phi/kernels/sparse/gpu/mask_kernel.cu
+13
-14
paddle/phi/kernels/sparse/gpu/mv_grad_kernel.cu
paddle/phi/kernels/sparse/gpu/mv_grad_kernel.cu
+2
-2
paddle/phi/kernels/sparse/gpu/pool_grad_kernel.cu
paddle/phi/kernels/sparse/gpu/pool_grad_kernel.cu
+4
-4
paddle/phi/kernels/sparse/gpu/pool_kernel.cu
paddle/phi/kernels/sparse/gpu/pool_kernel.cu
+3
-3
paddle/phi/kernels/sparse/gpu/softmax_grad_kernel.cu
paddle/phi/kernels/sparse/gpu/softmax_grad_kernel.cu
+3
-3
paddle/phi/kernels/sparse/gpu/softmax_kernel.cu
paddle/phi/kernels/sparse/gpu/softmax_kernel.cu
+2
-2
paddle/phi/kernels/sparse/gpu/sparse_utils_kernel.cu
paddle/phi/kernels/sparse/gpu/sparse_utils_kernel.cu
+8
-10
paddle/phi/kernels/sparse/gpu/unary_kernel.cu
paddle/phi/kernels/sparse/gpu/unary_kernel.cu
+4
-4
未找到文件。
paddle/phi/core/sparse_coo_tensor.h
浏览文件 @
016b94c2
...
...
@@ -63,10 +63,16 @@ class SparseCooTensor : public TensorBase,
/// \brief Returns the indices of non zero elemetns in original dense tensor.
/// \return The indices of non zero elemetns in original dense tensor.
const
DenseTensor
&
indices
()
const
{
return
non_zero_indices_
;
}
/// Note: This function will removed soon. It is recommended to use indices()
const
DenseTensor
&
non_zero_indices
()
const
{
return
non_zero_indices_
;
}
/// \brief Returns the non zero elemetns in original dense tensor.
/// \return The non zero elemetns in original dense tensor.
const
DenseTensor
&
values
()
const
{
return
non_zero_elements_
;
}
/// Note: This function will removed soon. It is recommended to use values()
const
DenseTensor
&
non_zero_elements
()
const
{
return
non_zero_elements_
;
}
/// \brief Returns whether the indices has coalesced
...
...
@@ -136,10 +142,18 @@ class SparseCooTensor : public TensorBase,
/// \brief Get a mutable pointer of non_zero_indices_.
/// return a mutable pointer of non_zero_indices_.
DenseTensor
*
mutable_indices
()
{
return
&
non_zero_indices_
;
}
/// Note: This function will removed soon. It is recommended to use
/// mutable_indices()
DenseTensor
*
mutable_non_zero_indices
()
{
return
&
non_zero_indices_
;
}
/// \brief Get a mutable pointer of non_zero_elements.
/// return a mutable pointer of non_zero_elements.
DenseTensor
*
mutable_values
()
{
return
&
non_zero_elements_
;
}
/// Note: This function will removed soon. It is recommended to use
/// mutable_values()
DenseTensor
*
mutable_non_zero_elements
()
{
return
&
non_zero_elements_
;
}
/// \brief This function is not recommended
...
...
paddle/phi/core/sparse_csr_tensor.cc
浏览文件 @
016b94c2
...
...
@@ -72,9 +72,9 @@ SparseCsrTensor::SparseCsrTensor(const SparseCsrTensor& other)
SparseCsrTensor
&
SparseCsrTensor
::
operator
=
(
const
SparseCsrTensor
&
other
)
{
this
->
dims_
=
other
.
dims
();
this
->
non_zero_crows_
=
other
.
non_zero_
crows
();
this
->
non_zero_cols_
=
other
.
non_zero_
cols
();
this
->
non_zero_elements_
=
other
.
non_zero_element
s
();
this
->
non_zero_crows_
=
other
.
crows
();
this
->
non_zero_cols_
=
other
.
cols
();
this
->
non_zero_elements_
=
other
.
value
s
();
return
*
this
;
}
...
...
paddle/phi/core/sparse_csr_tensor.h
浏览文件 @
016b94c2
...
...
@@ -74,15 +74,24 @@ class SparseCsrTensor : public TensorBase,
/// dense tensor.
/// \return The compressed row index of non zero elemetns in original dense
/// tensor.
const
DenseTensor
&
crows
()
const
{
return
non_zero_crows_
;
}
/// Note: This function will removed soon. It is recommended to use crows()
const
DenseTensor
&
non_zero_crows
()
const
{
return
non_zero_crows_
;
}
/// \brief Returns the column index of non zero elemetns in original dense
/// tensor.
/// \return The column index of non zero elemetns in original dense tensor.
const
DenseTensor
&
cols
()
const
{
return
non_zero_cols_
;
}
/// Note: This function will removed soon. It is recommended to use cols()
const
DenseTensor
&
non_zero_cols
()
const
{
return
non_zero_cols_
;
}
/// \brief Returns the non zero elemetns in original dense tensor.
/// \return The non zero elemetns in original dense tensor.
const
DenseTensor
&
values
()
const
{
return
non_zero_elements_
;
}
/// Note: This function will removed soon. It is recommended to use indices()
const
DenseTensor
&
non_zero_elements
()
const
{
return
non_zero_elements_
;
}
/// \brief Returns the total number of non zero elements in original dense
...
...
@@ -138,14 +147,26 @@ class SparseCsrTensor : public TensorBase,
/// \brief Get a mutable pointer of non_zero_crows.
/// return a mutable pointer of non_zero_crows.
DenseTensor
*
mutable_crows
()
{
return
&
non_zero_crows_
;
}
/// Note: This function will removed soon. It is recommended to use
/// mutable_crows()
DenseTensor
*
mutable_non_zero_crows
()
{
return
&
non_zero_crows_
;
}
/// \brief Get a mutable pointer of non_zero_cols.
/// return a mutable pointer of non_zero_cols.
DenseTensor
*
mutable_cols
()
{
return
&
non_zero_cols_
;
}
/// Note: This function will removed soon. It is recommended to use
/// mutable_cols()
DenseTensor
*
mutable_non_zero_cols
()
{
return
&
non_zero_cols_
;
}
/// \brief Get a mutable pointer of non_zero_elements.
/// return a mutable pointer of non_zero_elements.
DenseTensor
*
mutable_values
()
{
return
&
non_zero_elements_
;
}
/// Note: This function will removed soon. It is recommended to use
/// mutable_values()
DenseTensor
*
mutable_non_zero_elements
()
{
return
&
non_zero_elements_
;
}
/// \brief set the dims of original dense tensor
...
...
paddle/phi/kernels/sparse/gpu/coalesce_kernel.cu
浏览文件 @
016b94c2
...
...
@@ -31,7 +31,7 @@ void CoalesceGPUKernel(const GPUContext& dev_ctx,
const
SparseCooTensor
&
x
,
SparseCooTensor
*
out
)
{
const
DenseTensor
&
x_indices
=
x
.
non_zero_indices
();
const
DenseTensor
&
x_values
=
x
.
non_zero_element
s
();
const
DenseTensor
&
x_values
=
x
.
value
s
();
DenseTensor
out_indices
=
phi
::
EmptyLike
<
IntT
>
(
dev_ctx
,
x_indices
);
DenseTensor
out_values
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
x_values
);
...
...
@@ -73,7 +73,7 @@ void CoalesceGPUKernel(const GPUContext& dev_ctx,
// 2. get the address of each non-zero values
const
T
*
x_values_ptr
=
x_values
.
data
<
T
>
();
const
int64_t
stride
=
x
.
dims
().
size
()
==
sparse_dim
?
1
:
x
.
non_zero_element
s
().
dims
()[
1
];
x
.
dims
().
size
()
==
sparse_dim
?
1
:
x
.
value
s
().
dims
()[
1
];
DenseTensor
values_indexs
=
phi
::
Empty
(
dev_ctx
,
DenseTensorMeta
(
DataType
::
INT32
,
{
nnz
},
DataLayout
::
NCHW
));
int
*
values_indexs_ptr
=
values_indexs
.
data
<
int
>
();
...
...
paddle/phi/kernels/sparse/gpu/conv_grad_kernel.cu
浏览文件 @
016b94c2
...
...
@@ -81,7 +81,7 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx,
auto
blas
=
phi
::
funcs
::
GetBlas
<
GPUContext
,
T
>
(
dev_ctx
);
DenseTensor
x_grad_indices
=
phi
::
EmptyLike
<
IntT
>
(
dev_ctx
,
x
.
non_zero_indices
());
DenseTensor
x_grad_values
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
x
.
non_zero_element
s
());
DenseTensor
x_grad_values
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
x
.
value
s
());
T
*
x_grad_values_ptr
=
x_grad_values
.
data
<
T
>
();
phi
::
backends
::
gpu
::
GpuMemsetAsync
(
x_grad_values_ptr
,
0
,
...
...
@@ -109,16 +109,15 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx,
offsets
[
kernel_size
]
=
offset
;
if
(
subm
)
{
phi
::
funcs
::
sparse
::
SubmPreProcess
<
T
,
GPUContext
>
(
dev_ctx
,
x
,
kernel
,
out_grad
.
non_zero_elements
(),
in_channels
,
out_channels
,
half_kernel_size
,
kernel_grad
,
&
x_grad_values
);
phi
::
funcs
::
sparse
::
SubmPreProcess
<
T
,
GPUContext
>
(
dev_ctx
,
x
,
kernel
,
out_grad
.
values
(),
in_channels
,
out_channels
,
half_kernel_size
,
kernel_grad
,
&
x_grad_values
);
if
(
max_count
==
0
)
{
return
;
}
...
...
@@ -181,7 +180,7 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx,
unique_value_ptr
);
GatherV2
<
T
,
IntT
>
(
dev_ctx
,
x
.
non_zero_element
s
().
data
<
T
>
(),
x
.
value
s
().
data
<
T
>
(),
out_index_ptr
,
unique_value_ptr
,
x
.
nnz
(),
...
...
@@ -192,7 +191,7 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx,
in_features_ptr
);
Gather
<
T
,
IntT
>
(
dev_ctx
,
out_grad
.
non_zero_element
s
().
data
<
T
>
(),
out_grad
.
value
s
().
data
<
T
>
(),
rulebook_ptr
+
rulebook_len
,
rulebook_len
,
out_channels
,
...
...
paddle/phi/kernels/sparse/gpu/conv_kernel.cu
浏览文件 @
016b94c2
...
...
@@ -131,7 +131,7 @@ void Conv3dCooGPUKernel(const GPUContext& dev_ctx,
set_zero
(
dev_ctx
,
&
out_features
,
static_cast
<
T
>
(
0.0
f
));
Gather
<
T
,
IntT
>
(
dev_ctx
,
x
.
non_zero_element
s
().
data
<
T
>
(),
x
.
value
s
().
data
<
T
>
(),
rulebook_ptr
,
rulebook_len
,
in_channels
,
...
...
@@ -139,7 +139,7 @@ void Conv3dCooGPUKernel(const GPUContext& dev_ctx,
// 3. call gemm for every werght
auto
blas
=
phi
::
funcs
::
GetBlas
<
GPUContext
,
T
>
(
dev_ctx
);
auto
*
out_values
=
out
->
mutable_
non_zero_element
s
();
auto
*
out_values
=
out
->
mutable_
value
s
();
T
*
out_values_ptr
=
out_values
->
data
<
T
>
();
set_zero
(
dev_ctx
,
out_values
,
static_cast
<
T
>
(
0.0
f
));
...
...
paddle/phi/kernels/sparse/gpu/convolution.cu.h
浏览文件 @
016b94c2
...
...
@@ -515,9 +515,8 @@ int ProductRuleBook(const Context& dev_ctx,
const
int64_t
sparse_dim
=
4
;
DenseTensorMeta
indices_meta
(
indices_dtype
,
{
sparse_dim
,
out_non_zero_num
},
DataLayout
::
NCHW
);
DenseTensorMeta
values_meta
(
x
.
dtype
(),
{
out_non_zero_num
,
kernel_sizes
[
4
]},
x
.
non_zero_elements
().
layout
());
DenseTensorMeta
values_meta
(
x
.
dtype
(),
{
out_non_zero_num
,
kernel_sizes
[
4
]},
x
.
values
().
layout
());
phi
::
DenseTensor
out_indices
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
indices_meta
));
phi
::
DenseTensor
out_values
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
values_meta
));
...
...
@@ -541,11 +540,10 @@ int ProductRuleBook(const Context& dev_ctx,
}
else
{
DenseTensor
out_indices
=
phi
::
EmptyLike
<
IntT
>
(
dev_ctx
,
x
.
non_zero_indices
());
DenseTensor
out_values
=
phi
::
Empty
(
dev_ctx
,
DenseTensorMeta
(
x
.
dtype
(),
{
x
.
nnz
(),
kernel_sizes
[
4
]},
x
.
non_zero_elements
().
layout
()));
DenseTensor
out_values
=
phi
::
Empty
(
dev_ctx
,
DenseTensorMeta
(
x
.
dtype
(),
{
x
.
nnz
(),
kernel_sizes
[
4
]},
x
.
values
().
layout
()));
phi
::
Copy
(
dev_ctx
,
x
.
non_zero_indices
(),
dev_ctx
.
GetPlace
(),
false
,
&
out_indices
);
out
->
SetMember
(
out_indices
,
out_values
,
out_dims
,
true
);
...
...
paddle/phi/kernels/sparse/gpu/full_kernel.cu
浏览文件 @
016b94c2
...
...
@@ -48,8 +48,8 @@ void CooFullLikeKernel(const Context& dev_ctx,
false
,
out
->
mutable_non_zero_indices
());
DenseTensor
*
values
=
out
->
mutable_
non_zero_element
s
();
values
->
Resize
(
x
.
non_zero_element
s
().
dims
());
DenseTensor
*
values
=
out
->
mutable_
value
s
();
values
->
Resize
(
x
.
value
s
().
dims
());
dev_ctx
.
template
Alloc
<
T
>(
values
);
std
::
vector
<
const
DenseTensor
*>
inputs
=
{};
...
...
@@ -80,8 +80,8 @@ void CsrFullLikeKernel(const Context& dev_ctx,
false
,
out
->
mutable_non_zero_cols
());
DenseTensor
*
values
=
out
->
mutable_
non_zero_element
s
();
values
->
Resize
(
x
.
non_zero_element
s
().
dims
());
DenseTensor
*
values
=
out
->
mutable_
value
s
();
values
->
Resize
(
x
.
value
s
().
dims
());
dev_ctx
.
template
Alloc
<
T
>(
values
);
std
::
vector
<
const
DenseTensor
*>
inputs
=
{};
...
...
paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu
浏览文件 @
016b94c2
...
...
@@ -94,9 +94,9 @@ void FusedAttentionCsrGradKernel(const Context& dev_ctx,
AttnSoftmaxGpuGradKernel
<
T
><<<
grid
,
block
,
0
,
dev_ctx
.
stream
()
>>>
(
softmax
.
non_zero_crows
().
data
<
int64_t
>
(),
softmax
.
non_zero_element
s
().
data
<
T
>
(),
dsoftmax
.
mutable_
non_zero_element
s
()
->
data
<
T
>
(),
d_sdd_result
.
mutable_
non_zero_element
s
()
->
data
<
T
>
(),
softmax
.
value
s
().
data
<
T
>
(),
dsoftmax
.
mutable_
value
s
()
->
data
<
T
>
(),
d_sdd_result
.
mutable_
value
s
()
->
data
<
T
>
(),
M
,
total_row_num
,
std
::
sqrt
(
N
),
...
...
paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu
浏览文件 @
016b94c2
...
...
@@ -202,10 +202,10 @@ void FusedAttentionCsrKernel(
AttnSoftmaxGpuKernel
<
T
><<<
grid
,
block
,
0
,
dev_ctx
.
stream
()
>>>
(
sdd_result
.
non_zero_crows
().
data
<
int64_t
>
(),
sdd_result
.
non_zero_cols
().
data
<
int64_t
>
(),
sdd_result
.
non_zero_element
s
().
data
<
T
>
(),
sdd_result
.
value
s
().
data
<
T
>
(),
kp_mask_ptr
?
kp_mask_ptr
->
data
<
T
>
()
:
nullptr
,
attn_mask_ptr
?
attn_mask_ptr
->
data
<
T
>
()
:
nullptr
,
softmax
->
mutable_
non_zero_element
s
()
->
data
<
T
>
(),
softmax
->
mutable_
value
s
()
->
data
<
T
>
(),
M
,
total_row_num
,
q_dim
[
1
],
...
...
paddle/phi/kernels/sparse/gpu/mask_kernel.cu
浏览文件 @
016b94c2
...
...
@@ -59,7 +59,7 @@ void SparseMaskGPUKernel(const GPUContext& dev_ctx,
mask
.
dims
(),
phi
::
errors
::
InvalidArgument
(
"the input x and mask must have the shape"
));
const
DenseTensor
&
indices
=
mask
.
non_zero_indices
();
const
DenseTensor
&
values
=
mask
.
non_zero_element
s
();
const
DenseTensor
&
values
=
mask
.
value
s
();
const
int
sparse_dim
=
mask
.
sparse_dim
();
DenseTensor
sparse_offsets
=
phi
::
Empty
<
GPUContext
>
(
dev_ctx
,
...
...
@@ -224,8 +224,8 @@ void SparseMaskHelperGPUKernel(const GPUContext& dev_ctx,
phi
::
backends
::
gpu
::
GpuMemsetAsync
(
table
.
data
<
int
>
(),
0
,
table_size
*
sizeof
(
int
),
dev_ctx
.
stream
());
const
int64_t
stride
=
x
.
dims
().
size
()
==
sparse_dim
?
1
:
x
.
non_zero_element
s
().
dims
()[
1
];
*
out
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
x
.
non_zero_element
s
());
x
.
dims
().
size
()
==
sparse_dim
?
1
:
x
.
value
s
().
dims
()[
1
];
*
out
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
x
.
value
s
());
phi
::
funcs
::
SetConstant
<
GPUContext
,
T
>
set_zero
;
set_zero
(
dev_ctx
,
out
,
static_cast
<
T
>
(
0
));
T
*
out_ptr
=
out
->
data
<
T
>
();
...
...
@@ -242,16 +242,15 @@ void SparseMaskHelperGPUKernel(const GPUContext& dev_ctx,
const
int
VecBytes
=
16
;
const
int
VecSize
=
VecBytes
/
sizeof
(
T
);
if
(
stride
%
VecSize
==
0
)
{
MaskCopy
<
T
,
IntT
,
VecSize
>
<<<
config
.
block_per_grid
,
config
.
thread_per_block
,
0
,
dev_ctx
.
stream
()
>>>
(
mask_indexs_ptr
,
table
.
data
<
int
>
(),
mask_indexs
.
numel
(),
stride
,
x
.
non_zero_elements
().
data
<
T
>
(),
out_ptr
);
MaskCopy
<
T
,
IntT
,
VecSize
><<<
config
.
block_per_grid
,
config
.
thread_per_block
,
0
,
dev_ctx
.
stream
()
>>>
(
mask_indexs_ptr
,
table
.
data
<
int
>
(),
mask_indexs
.
numel
(),
stride
,
x
.
values
().
data
<
T
>
(),
out_ptr
);
}
else
{
MaskCopy
<
T
,
IntT
,
1
><<<
config
.
block_per_grid
,
config
.
thread_per_block
,
...
...
@@ -260,7 +259,7 @@ void SparseMaskHelperGPUKernel(const GPUContext& dev_ctx,
table
.
data
<
int
>
(),
mask_indexs
.
numel
(),
stride
,
x
.
non_zero_element
s
().
data
<
T
>
(),
x
.
value
s
().
data
<
T
>
(),
out_ptr
);
}
}
...
...
paddle/phi/kernels/sparse/gpu/mv_grad_kernel.cu
浏览文件 @
016b94c2
...
...
@@ -79,7 +79,7 @@ void MvCooGradKernel(const Context &dev_ctx,
dev_ctx
.
stream
()
>>>
(
dout
.
data
<
T
>
(),
vec
.
data
<
T
>
(),
dx
->
non_zero_indices
().
data
<
data_t
>
(),
dx
->
mutable_
non_zero_element
s
()
->
data
<
T
>
(),
dx
->
mutable_
value
s
()
->
data
<
T
>
(),
dx
->
nnz
());
}));
}
...
...
@@ -127,7 +127,7 @@ void MvCsrGradKernel(const Context &dev_ctx,
vec
.
data
<
T
>
(),
dx
->
non_zero_crows
().
data
<
data_t
>
(),
dx
->
non_zero_cols
().
data
<
data_t
>
(),
dx
->
mutable_
non_zero_element
s
()
->
data
<
T
>
(),
dx
->
mutable_
value
s
()
->
data
<
T
>
(),
row_number
);
}));
}
...
...
paddle/phi/kernels/sparse/gpu/pool_grad_kernel.cu
浏览文件 @
016b94c2
...
...
@@ -68,13 +68,13 @@ void MaxPoolCooGradGPUKernel(const GPUContext& dev_ctx,
const
int
*
counter_ptr
=
counter
.
data
<
int
>
();
phi
::
funcs
::
sparse
::
PrefixSum
(
counter_ptr
,
&
offsets
[
0
],
kernel_size
);
const
T
*
in_features_ptr
=
x
.
non_zero_element
s
().
data
<
T
>
();
const
T
*
out_features_ptr
=
out
.
non_zero_element
s
().
data
<
T
>
();
const
T
*
out_grad_ptr
=
out_grad
.
non_zero_element
s
().
data
<
T
>
();
const
T
*
in_features_ptr
=
x
.
value
s
().
data
<
T
>
();
const
T
*
out_features_ptr
=
out
.
value
s
().
data
<
T
>
();
const
T
*
out_grad_ptr
=
out_grad
.
value
s
().
data
<
T
>
();
// TODO(zhangkaihuo): call phi::sparse::EmptyLike
DenseTensor
x_grad_indices
=
phi
::
EmptyLike
<
IntT
>
(
dev_ctx
,
x
.
non_zero_indices
());
DenseTensor
x_grad_values
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
x
.
non_zero_element
s
());
DenseTensor
x_grad_values
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
x
.
value
s
());
x_grad
->
SetMember
(
x_grad_indices
,
x_grad_values
,
x
.
dims
(),
true
);
T
*
x_grad_ptr
=
x_grad_values
.
data
<
T
>
();
phi
::
funcs
::
SetConstant
<
GPUContext
,
T
>
set_zero
;
...
...
paddle/phi/kernels/sparse/gpu/pool_kernel.cu
浏览文件 @
016b94c2
...
...
@@ -95,8 +95,8 @@ void MaxPoolCooGPUKernel(const GPUContext& dev_ctx,
const
IntT
*
rulebook_ptr
=
rulebook
->
data
<
IntT
>
();
T
*
out_features_ptr
=
out
->
mutable_
non_zero_element
s
()
->
data
<
T
>
();
const
T
*
in_features_ptr
=
x
.
non_zero_element
s
().
data
<
T
>
();
T
*
out_features_ptr
=
out
->
mutable_
value
s
()
->
data
<
T
>
();
const
T
*
in_features_ptr
=
x
.
value
s
().
data
<
T
>
();
counter
->
Resize
({
kernel_size
});
int
*
counter_ptr
=
dev_ctx
.
template
HostAlloc
<
int
>(
counter
);
memcpy
(
counter_ptr
,
h_counter
.
data
(),
h_counter
.
size
()
*
sizeof
(
int
));
...
...
@@ -107,7 +107,7 @@ void MaxPoolCooGPUKernel(const GPUContext& dev_ctx,
thrust
::
fill
(
thrust
::
cuda
::
par
.
on
(
dev_ctx
.
stream
()),
#endif
out_features_ptr
,
out_features_ptr
+
out
->
non_zero_element
s
().
numel
(),
out_features_ptr
+
out
->
value
s
().
numel
(),
static_cast
<
T
>
(
0
));
// TODO(zhangkaihuo) Replacing multiple calls with one kernel may be faster
for
(
int
i
=
0
;
i
<
kernel_size
;
i
++
)
{
...
...
paddle/phi/kernels/sparse/gpu/softmax_grad_kernel.cu
浏览文件 @
016b94c2
...
...
@@ -96,9 +96,9 @@ void SoftmaxCsrGradKernel(const Context& dev_ctx,
out
.
non_zero_crows
().
dtype
(),
"SoftmaxCsrGradKernel"
,
([
&
]
{
SoftmaxGradGpuKernel
<
T
,
data_t
><<<
grid
,
block
,
0
,
dev_ctx
.
stream
()
>>>
(
out
.
non_zero_crows
().
data
<
data_t
>
(),
out
.
non_zero_element
s
().
data
<
T
>
(),
dout
.
non_zero_element
s
().
data
<
T
>
(),
dx
->
mutable_
non_zero_element
s
()
->
data
<
T
>
(),
out
.
value
s
().
data
<
T
>
(),
dout
.
value
s
().
data
<
T
>
(),
dx
->
mutable_
value
s
()
->
data
<
T
>
(),
row_number
,
total_row_number
);
}));
...
...
paddle/phi/kernels/sparse/gpu/softmax_kernel.cu
浏览文件 @
016b94c2
...
...
@@ -109,8 +109,8 @@ void SoftmaxCsrKernel(const Context& dev_ctx,
x
.
non_zero_crows
().
dtype
(),
"CsrSoftmaxKernel"
,
([
&
]
{
SoftmaxGpuKernel
<
T
,
data_t
><<<
grid
,
block
,
0
,
dev_ctx
.
stream
()
>>>
(
x
.
non_zero_crows
().
data
<
data_t
>
(),
x
.
non_zero_element
s
().
data
<
T
>
(),
out
->
mutable_
non_zero_element
s
()
->
data
<
T
>
(),
x
.
value
s
().
data
<
T
>
(),
out
->
mutable_
value
s
()
->
data
<
T
>
(),
row_number
,
total_row_number
);
}));
...
...
paddle/phi/kernels/sparse/gpu/sparse_utils_kernel.cu
浏览文件 @
016b94c2
...
...
@@ -215,7 +215,7 @@ void SparseCsrToCooGPUKernel(const GPUContext& dev_ctx,
const
int64_t
non_zero_num
=
x
.
non_zero_cols
().
numel
();
const
auto
&
csr_crows
=
x
.
non_zero_crows
();
const
auto
&
csr_cols
=
x
.
non_zero_cols
();
const
auto
&
csr_values
=
x
.
non_zero_element
s
();
const
auto
&
csr_values
=
x
.
value
s
();
const
IntT
*
csr_crows_data
=
csr_crows
.
data
<
IntT
>
();
const
IntT
*
csr_cols_data
=
csr_cols
.
data
<
IntT
>
();
const
T
*
csr_values_data
=
csr_values
.
data
<
T
>
();
...
...
@@ -361,14 +361,13 @@ void SparseCooToCsrGPUKernel(const GPUContext& dev_ctx,
phi
::
DenseTensor
non_zero_crows
=
phi
::
Empty
<
IntT
>
(
dev_ctx
,
{
batchs
*
(
rows
+
1
)});
phi
::
DenseTensor
non_zero_cols
=
phi
::
Empty
<
IntT
>
(
dev_ctx
,
{
non_zero_num
});
phi
::
DenseTensor
non_zero_elements
=
phi
::
EmptyLike
<
T
,
GPUContext
>
(
dev_ctx
,
x
.
non_zero_elements
());
phi
::
DenseTensor
values
=
phi
::
EmptyLike
<
T
,
GPUContext
>
(
dev_ctx
,
x
.
values
());
IntT
*
csr_crows_data
=
non_zero_crows
.
data
<
IntT
>
();
IntT
*
csr_cols_data
=
non_zero_cols
.
data
<
IntT
>
();
T
*
csr_values_data
=
non_zero_element
s
.
data
<
T
>
();
T
*
csr_values_data
=
value
s
.
data
<
T
>
();
const
auto
&
coo_indices
=
x
.
non_zero_indices
();
const
auto
&
coo_values
=
x
.
non_zero_element
s
();
const
auto
&
coo_values
=
x
.
value
s
();
const
IntT
*
batchs_ptr
=
coo_indices
.
data
<
IntT
>
();
const
IntT
*
coo_rows_data
=
x_dims
.
size
()
==
2
?
batchs_ptr
:
batchs_ptr
+
non_zero_num
;
...
...
@@ -414,7 +413,7 @@ void SparseCooToCsrGPUKernel(const GPUContext& dev_ctx,
sizeof
(
T
)
*
non_zero_num
,
gpuMemcpyDeviceToDevice
,
dev_ctx
.
stream
());
out
->
SetMember
(
non_zero_crows
,
non_zero_cols
,
non_zero_element
s
,
x_dims
);
out
->
SetMember
(
non_zero_crows
,
non_zero_cols
,
value
s
,
x_dims
);
}
template
<
typename
T
,
typename
Context
>
...
...
@@ -455,7 +454,7 @@ void SparseCooToDenseGPUKernel(const GPUContext& dev_ctx,
const
auto
non_zero_num
=
x
.
nnz
();
const
auto
dense_dims
=
x
.
dims
();
const
auto
indices
=
x
.
non_zero_indices
();
const
auto
values
=
x
.
non_zero_element
s
();
const
auto
values
=
x
.
value
s
();
const
auto
indices_dims
=
indices
.
dims
();
int64_t
sparse_dim
=
indices_dims
[
0
];
if
(
indices_dims
.
size
()
==
1
)
{
...
...
@@ -465,9 +464,8 @@ void SparseCooToDenseGPUKernel(const GPUContext& dev_ctx,
const
auto
place
=
dev_ctx
.
GetPlace
();
const
T
*
x_data
=
values
.
data
<
T
>
();
*
out
=
phi
::
Empty
(
dev_ctx
,
phi
::
DenseTensorMeta
(
x
.
dtype
(),
x
.
dims
(),
x
.
non_zero_elements
().
layout
()));
*
out
=
phi
::
Empty
(
dev_ctx
,
phi
::
DenseTensorMeta
(
x
.
dtype
(),
x
.
dims
(),
x
.
values
().
layout
()));
T
*
out_data
=
out
->
data
<
T
>
();
int64_t
base_offset
=
1
;
for
(
int64_t
i
=
0
;
i
<
dense_dim
;
i
++
)
{
...
...
paddle/phi/kernels/sparse/gpu/unary_kernel.cu
浏览文件 @
016b94c2
...
...
@@ -40,8 +40,8 @@ void DivCooScalarKernel(const Context& dev_ctx,
SparseCooTensor
*
out
)
{
EmptyLikeCooKernel
<
T
,
Context
>
(
dev_ctx
,
x
,
out
);
std
::
vector
<
const
DenseTensor
*>
ins
=
{
&
(
x
.
non_zero_element
s
())};
std
::
vector
<
DenseTensor
*>
outs
=
{
out
->
mutable_
non_zero_element
s
()};
std
::
vector
<
const
DenseTensor
*>
ins
=
{
&
(
x
.
value
s
())};
std
::
vector
<
DenseTensor
*>
outs
=
{
out
->
mutable_
value
s
()};
DivScalarFunctor
<
T
>
func
(
static_cast
<
T
>
(
scalar
));
funcs
::
ElementwiseKernel
<
T
,
DivScalarFunctor
<
T
>>
(
dev_ctx
,
ins
,
&
outs
,
func
);
}
...
...
@@ -53,8 +53,8 @@ void DivCsrScalarKernel(const Context& dev_ctx,
SparseCsrTensor
*
out
)
{
EmptyLikeCsrKernel
<
T
,
Context
>
(
dev_ctx
,
x
,
out
);
std
::
vector
<
const
DenseTensor
*>
ins
=
{
&
(
x
.
non_zero_element
s
())};
std
::
vector
<
DenseTensor
*>
outs
=
{
out
->
mutable_
non_zero_element
s
()};
std
::
vector
<
const
DenseTensor
*>
ins
=
{
&
(
x
.
value
s
())};
std
::
vector
<
DenseTensor
*>
outs
=
{
out
->
mutable_
value
s
()};
DivScalarFunctor
<
T
>
func
(
static_cast
<
T
>
(
scalar
));
funcs
::
ElementwiseKernel
<
T
,
DivScalarFunctor
<
T
>>
(
dev_ctx
,
ins
,
&
outs
,
func
);
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录