Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
016b94c2
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
016b94c2
编写于
8月 22, 2022
作者:
Z
zhangkaihuo
提交者:
GitHub
8月 22, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
rename the member function of SparseTensor (#45291)
上级
ed57237e
变更
18
隐藏空白更改
内联
并排
Showing
18 changed file
with
108 addition
and
79 deletion
+108
-79
paddle/phi/core/sparse_coo_tensor.h
paddle/phi/core/sparse_coo_tensor.h
+14
-0
paddle/phi/core/sparse_csr_tensor.cc
paddle/phi/core/sparse_csr_tensor.cc
+3
-3
paddle/phi/core/sparse_csr_tensor.h
paddle/phi/core/sparse_csr_tensor.h
+21
-0
paddle/phi/kernels/sparse/gpu/coalesce_kernel.cu
paddle/phi/kernels/sparse/gpu/coalesce_kernel.cu
+2
-2
paddle/phi/kernels/sparse/gpu/conv_grad_kernel.cu
paddle/phi/kernels/sparse/gpu/conv_grad_kernel.cu
+12
-13
paddle/phi/kernels/sparse/gpu/conv_kernel.cu
paddle/phi/kernels/sparse/gpu/conv_kernel.cu
+2
-2
paddle/phi/kernels/sparse/gpu/convolution.cu.h
paddle/phi/kernels/sparse/gpu/convolution.cu.h
+6
-8
paddle/phi/kernels/sparse/gpu/full_kernel.cu
paddle/phi/kernels/sparse/gpu/full_kernel.cu
+4
-4
paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu
paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu
+3
-3
paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu
paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu
+2
-2
paddle/phi/kernels/sparse/gpu/mask_kernel.cu
paddle/phi/kernels/sparse/gpu/mask_kernel.cu
+13
-14
paddle/phi/kernels/sparse/gpu/mv_grad_kernel.cu
paddle/phi/kernels/sparse/gpu/mv_grad_kernel.cu
+2
-2
paddle/phi/kernels/sparse/gpu/pool_grad_kernel.cu
paddle/phi/kernels/sparse/gpu/pool_grad_kernel.cu
+4
-4
paddle/phi/kernels/sparse/gpu/pool_kernel.cu
paddle/phi/kernels/sparse/gpu/pool_kernel.cu
+3
-3
paddle/phi/kernels/sparse/gpu/softmax_grad_kernel.cu
paddle/phi/kernels/sparse/gpu/softmax_grad_kernel.cu
+3
-3
paddle/phi/kernels/sparse/gpu/softmax_kernel.cu
paddle/phi/kernels/sparse/gpu/softmax_kernel.cu
+2
-2
paddle/phi/kernels/sparse/gpu/sparse_utils_kernel.cu
paddle/phi/kernels/sparse/gpu/sparse_utils_kernel.cu
+8
-10
paddle/phi/kernels/sparse/gpu/unary_kernel.cu
paddle/phi/kernels/sparse/gpu/unary_kernel.cu
+4
-4
未找到文件。
paddle/phi/core/sparse_coo_tensor.h
浏览文件 @
016b94c2
...
...
@@ -63,10 +63,16 @@ class SparseCooTensor : public TensorBase,
/// \brief Returns the indices of non zero elemetns in original dense tensor.
/// \return The indices of non zero elemetns in original dense tensor.
const
DenseTensor
&
indices
()
const
{
return
non_zero_indices_
;
}
/// Note: This function will removed soon. It is recommended to use indices()
const
DenseTensor
&
non_zero_indices
()
const
{
return
non_zero_indices_
;
}
/// \brief Returns the non zero elemetns in original dense tensor.
/// \return The non zero elemetns in original dense tensor.
const
DenseTensor
&
values
()
const
{
return
non_zero_elements_
;
}
/// Note: This function will removed soon. It is recommended to use values()
const
DenseTensor
&
non_zero_elements
()
const
{
return
non_zero_elements_
;
}
/// \brief Returns whether the indices has coalesced
...
...
@@ -136,10 +142,18 @@ class SparseCooTensor : public TensorBase,
/// \brief Get a mutable pointer of non_zero_indices_.
/// return a mutable pointer of non_zero_indices_.
DenseTensor
*
mutable_indices
()
{
return
&
non_zero_indices_
;
}
/// Note: This function will removed soon. It is recommended to use
/// mutable_indices()
DenseTensor
*
mutable_non_zero_indices
()
{
return
&
non_zero_indices_
;
}
/// \brief Get a mutable pointer of non_zero_elements.
/// return a mutable pointer of non_zero_elements.
DenseTensor
*
mutable_values
()
{
return
&
non_zero_elements_
;
}
/// Note: This function will removed soon. It is recommended to use
/// mutable_values()
DenseTensor
*
mutable_non_zero_elements
()
{
return
&
non_zero_elements_
;
}
/// \brief This function is not recommended
...
...
paddle/phi/core/sparse_csr_tensor.cc
浏览文件 @
016b94c2
...
...
@@ -72,9 +72,9 @@ SparseCsrTensor::SparseCsrTensor(const SparseCsrTensor& other)
SparseCsrTensor
&
SparseCsrTensor
::
operator
=
(
const
SparseCsrTensor
&
other
)
{
this
->
dims_
=
other
.
dims
();
this
->
non_zero_crows_
=
other
.
non_zero_
crows
();
this
->
non_zero_cols_
=
other
.
non_zero_
cols
();
this
->
non_zero_elements_
=
other
.
non_zero_element
s
();
this
->
non_zero_crows_
=
other
.
crows
();
this
->
non_zero_cols_
=
other
.
cols
();
this
->
non_zero_elements_
=
other
.
value
s
();
return
*
this
;
}
...
...
paddle/phi/core/sparse_csr_tensor.h
浏览文件 @
016b94c2
...
...
@@ -74,15 +74,24 @@ class SparseCsrTensor : public TensorBase,
/// dense tensor.
/// \return The compressed row index of non zero elemetns in original dense
/// tensor.
const
DenseTensor
&
crows
()
const
{
return
non_zero_crows_
;
}
/// Note: This function will removed soon. It is recommended to use crows()
const
DenseTensor
&
non_zero_crows
()
const
{
return
non_zero_crows_
;
}
/// \brief Returns the column index of non zero elemetns in original dense
/// tensor.
/// \return The column index of non zero elemetns in original dense tensor.
const
DenseTensor
&
cols
()
const
{
return
non_zero_cols_
;
}
/// Note: This function will removed soon. It is recommended to use cols()
const
DenseTensor
&
non_zero_cols
()
const
{
return
non_zero_cols_
;
}
/// \brief Returns the non zero elemetns in original dense tensor.
/// \return The non zero elemetns in original dense tensor.
const
DenseTensor
&
values
()
const
{
return
non_zero_elements_
;
}
/// Note: This function will removed soon. It is recommended to use indices()
const
DenseTensor
&
non_zero_elements
()
const
{
return
non_zero_elements_
;
}
/// \brief Returns the total number of non zero elements in original dense
...
...
@@ -138,14 +147,26 @@ class SparseCsrTensor : public TensorBase,
/// \brief Get a mutable pointer of non_zero_crows.
/// return a mutable pointer of non_zero_crows.
DenseTensor
*
mutable_crows
()
{
return
&
non_zero_crows_
;
}
/// Note: This function will removed soon. It is recommended to use
/// mutable_crows()
DenseTensor
*
mutable_non_zero_crows
()
{
return
&
non_zero_crows_
;
}
/// \brief Get a mutable pointer of non_zero_cols.
/// return a mutable pointer of non_zero_cols.
DenseTensor
*
mutable_cols
()
{
return
&
non_zero_cols_
;
}
/// Note: This function will removed soon. It is recommended to use
/// mutable_cols()
DenseTensor
*
mutable_non_zero_cols
()
{
return
&
non_zero_cols_
;
}
/// \brief Get a mutable pointer of non_zero_elements.
/// return a mutable pointer of non_zero_elements.
DenseTensor
*
mutable_values
()
{
return
&
non_zero_elements_
;
}
/// Note: This function will removed soon. It is recommended to use
/// mutable_values()
DenseTensor
*
mutable_non_zero_elements
()
{
return
&
non_zero_elements_
;
}
/// \brief set the dims of original dense tensor
...
...
paddle/phi/kernels/sparse/gpu/coalesce_kernel.cu
浏览文件 @
016b94c2
...
...
@@ -31,7 +31,7 @@ void CoalesceGPUKernel(const GPUContext& dev_ctx,
const
SparseCooTensor
&
x
,
SparseCooTensor
*
out
)
{
const
DenseTensor
&
x_indices
=
x
.
non_zero_indices
();
const
DenseTensor
&
x_values
=
x
.
non_zero_element
s
();
const
DenseTensor
&
x_values
=
x
.
value
s
();
DenseTensor
out_indices
=
phi
::
EmptyLike
<
IntT
>
(
dev_ctx
,
x_indices
);
DenseTensor
out_values
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
x_values
);
...
...
@@ -73,7 +73,7 @@ void CoalesceGPUKernel(const GPUContext& dev_ctx,
// 2. get the address of each non-zero values
const
T
*
x_values_ptr
=
x_values
.
data
<
T
>
();
const
int64_t
stride
=
x
.
dims
().
size
()
==
sparse_dim
?
1
:
x
.
non_zero_element
s
().
dims
()[
1
];
x
.
dims
().
size
()
==
sparse_dim
?
1
:
x
.
value
s
().
dims
()[
1
];
DenseTensor
values_indexs
=
phi
::
Empty
(
dev_ctx
,
DenseTensorMeta
(
DataType
::
INT32
,
{
nnz
},
DataLayout
::
NCHW
));
int
*
values_indexs_ptr
=
values_indexs
.
data
<
int
>
();
...
...
paddle/phi/kernels/sparse/gpu/conv_grad_kernel.cu
浏览文件 @
016b94c2
...
...
@@ -81,7 +81,7 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx,
auto
blas
=
phi
::
funcs
::
GetBlas
<
GPUContext
,
T
>
(
dev_ctx
);
DenseTensor
x_grad_indices
=
phi
::
EmptyLike
<
IntT
>
(
dev_ctx
,
x
.
non_zero_indices
());
DenseTensor
x_grad_values
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
x
.
non_zero_element
s
());
DenseTensor
x_grad_values
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
x
.
value
s
());
T
*
x_grad_values_ptr
=
x_grad_values
.
data
<
T
>
();
phi
::
backends
::
gpu
::
GpuMemsetAsync
(
x_grad_values_ptr
,
0
,
...
...
@@ -109,16 +109,15 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx,
offsets
[
kernel_size
]
=
offset
;
if
(
subm
)
{
phi
::
funcs
::
sparse
::
SubmPreProcess
<
T
,
GPUContext
>
(
dev_ctx
,
x
,
kernel
,
out_grad
.
non_zero_elements
(),
in_channels
,
out_channels
,
half_kernel_size
,
kernel_grad
,
&
x_grad_values
);
phi
::
funcs
::
sparse
::
SubmPreProcess
<
T
,
GPUContext
>
(
dev_ctx
,
x
,
kernel
,
out_grad
.
values
(),
in_channels
,
out_channels
,
half_kernel_size
,
kernel_grad
,
&
x_grad_values
);
if
(
max_count
==
0
)
{
return
;
}
...
...
@@ -181,7 +180,7 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx,
unique_value_ptr
);
GatherV2
<
T
,
IntT
>
(
dev_ctx
,
x
.
non_zero_element
s
().
data
<
T
>
(),
x
.
value
s
().
data
<
T
>
(),
out_index_ptr
,
unique_value_ptr
,
x
.
nnz
(),
...
...
@@ -192,7 +191,7 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx,
in_features_ptr
);
Gather
<
T
,
IntT
>
(
dev_ctx
,
out_grad
.
non_zero_element
s
().
data
<
T
>
(),
out_grad
.
value
s
().
data
<
T
>
(),
rulebook_ptr
+
rulebook_len
,
rulebook_len
,
out_channels
,
...
...
paddle/phi/kernels/sparse/gpu/conv_kernel.cu
浏览文件 @
016b94c2
...
...
@@ -131,7 +131,7 @@ void Conv3dCooGPUKernel(const GPUContext& dev_ctx,
set_zero
(
dev_ctx
,
&
out_features
,
static_cast
<
T
>
(
0.0
f
));
Gather
<
T
,
IntT
>
(
dev_ctx
,
x
.
non_zero_element
s
().
data
<
T
>
(),
x
.
value
s
().
data
<
T
>
(),
rulebook_ptr
,
rulebook_len
,
in_channels
,
...
...
@@ -139,7 +139,7 @@ void Conv3dCooGPUKernel(const GPUContext& dev_ctx,
// 3. call gemm for every werght
auto
blas
=
phi
::
funcs
::
GetBlas
<
GPUContext
,
T
>
(
dev_ctx
);
auto
*
out_values
=
out
->
mutable_
non_zero_element
s
();
auto
*
out_values
=
out
->
mutable_
value
s
();
T
*
out_values_ptr
=
out_values
->
data
<
T
>
();
set_zero
(
dev_ctx
,
out_values
,
static_cast
<
T
>
(
0.0
f
));
...
...
paddle/phi/kernels/sparse/gpu/convolution.cu.h
浏览文件 @
016b94c2
...
...
@@ -515,9 +515,8 @@ int ProductRuleBook(const Context& dev_ctx,
const
int64_t
sparse_dim
=
4
;
DenseTensorMeta
indices_meta
(
indices_dtype
,
{
sparse_dim
,
out_non_zero_num
},
DataLayout
::
NCHW
);
DenseTensorMeta
values_meta
(
x
.
dtype
(),
{
out_non_zero_num
,
kernel_sizes
[
4
]},
x
.
non_zero_elements
().
layout
());
DenseTensorMeta
values_meta
(
x
.
dtype
(),
{
out_non_zero_num
,
kernel_sizes
[
4
]},
x
.
values
().
layout
());
phi
::
DenseTensor
out_indices
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
indices_meta
));
phi
::
DenseTensor
out_values
=
phi
::
Empty
(
dev_ctx
,
std
::
move
(
values_meta
));
...
...
@@ -541,11 +540,10 @@ int ProductRuleBook(const Context& dev_ctx,
}
else
{
DenseTensor
out_indices
=
phi
::
EmptyLike
<
IntT
>
(
dev_ctx
,
x
.
non_zero_indices
());
DenseTensor
out_values
=
phi
::
Empty
(
dev_ctx
,
DenseTensorMeta
(
x
.
dtype
(),
{
x
.
nnz
(),
kernel_sizes
[
4
]},
x
.
non_zero_elements
().
layout
()));
DenseTensor
out_values
=
phi
::
Empty
(
dev_ctx
,
DenseTensorMeta
(
x
.
dtype
(),
{
x
.
nnz
(),
kernel_sizes
[
4
]},
x
.
values
().
layout
()));
phi
::
Copy
(
dev_ctx
,
x
.
non_zero_indices
(),
dev_ctx
.
GetPlace
(),
false
,
&
out_indices
);
out
->
SetMember
(
out_indices
,
out_values
,
out_dims
,
true
);
...
...
paddle/phi/kernels/sparse/gpu/full_kernel.cu
浏览文件 @
016b94c2
...
...
@@ -48,8 +48,8 @@ void CooFullLikeKernel(const Context& dev_ctx,
false
,
out
->
mutable_non_zero_indices
());
DenseTensor
*
values
=
out
->
mutable_
non_zero_element
s
();
values
->
Resize
(
x
.
non_zero_element
s
().
dims
());
DenseTensor
*
values
=
out
->
mutable_
value
s
();
values
->
Resize
(
x
.
value
s
().
dims
());
dev_ctx
.
template
Alloc
<
T
>(
values
);
std
::
vector
<
const
DenseTensor
*>
inputs
=
{};
...
...
@@ -80,8 +80,8 @@ void CsrFullLikeKernel(const Context& dev_ctx,
false
,
out
->
mutable_non_zero_cols
());
DenseTensor
*
values
=
out
->
mutable_
non_zero_element
s
();
values
->
Resize
(
x
.
non_zero_element
s
().
dims
());
DenseTensor
*
values
=
out
->
mutable_
value
s
();
values
->
Resize
(
x
.
value
s
().
dims
());
dev_ctx
.
template
Alloc
<
T
>(
values
);
std
::
vector
<
const
DenseTensor
*>
inputs
=
{};
...
...
paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu
浏览文件 @
016b94c2
...
...
@@ -94,9 +94,9 @@ void FusedAttentionCsrGradKernel(const Context& dev_ctx,
AttnSoftmaxGpuGradKernel
<
T
><<<
grid
,
block
,
0
,
dev_ctx
.
stream
()
>>>
(
softmax
.
non_zero_crows
().
data
<
int64_t
>
(),
softmax
.
non_zero_element
s
().
data
<
T
>
(),
dsoftmax
.
mutable_
non_zero_element
s
()
->
data
<
T
>
(),
d_sdd_result
.
mutable_
non_zero_element
s
()
->
data
<
T
>
(),
softmax
.
value
s
().
data
<
T
>
(),
dsoftmax
.
mutable_
value
s
()
->
data
<
T
>
(),
d_sdd_result
.
mutable_
value
s
()
->
data
<
T
>
(),
M
,
total_row_num
,
std
::
sqrt
(
N
),
...
...
paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu
浏览文件 @
016b94c2
...
...
@@ -202,10 +202,10 @@ void FusedAttentionCsrKernel(
AttnSoftmaxGpuKernel
<
T
><<<
grid
,
block
,
0
,
dev_ctx
.
stream
()
>>>
(
sdd_result
.
non_zero_crows
().
data
<
int64_t
>
(),
sdd_result
.
non_zero_cols
().
data
<
int64_t
>
(),
sdd_result
.
non_zero_element
s
().
data
<
T
>
(),
sdd_result
.
value
s
().
data
<
T
>
(),
kp_mask_ptr
?
kp_mask_ptr
->
data
<
T
>
()
:
nullptr
,
attn_mask_ptr
?
attn_mask_ptr
->
data
<
T
>
()
:
nullptr
,
softmax
->
mutable_
non_zero_element
s
()
->
data
<
T
>
(),
softmax
->
mutable_
value
s
()
->
data
<
T
>
(),
M
,
total_row_num
,
q_dim
[
1
],
...
...
paddle/phi/kernels/sparse/gpu/mask_kernel.cu
浏览文件 @
016b94c2
...
...
@@ -59,7 +59,7 @@ void SparseMaskGPUKernel(const GPUContext& dev_ctx,
mask
.
dims
(),
phi
::
errors
::
InvalidArgument
(
"the input x and mask must have the shape"
));
const
DenseTensor
&
indices
=
mask
.
non_zero_indices
();
const
DenseTensor
&
values
=
mask
.
non_zero_element
s
();
const
DenseTensor
&
values
=
mask
.
value
s
();
const
int
sparse_dim
=
mask
.
sparse_dim
();
DenseTensor
sparse_offsets
=
phi
::
Empty
<
GPUContext
>
(
dev_ctx
,
...
...
@@ -224,8 +224,8 @@ void SparseMaskHelperGPUKernel(const GPUContext& dev_ctx,
phi
::
backends
::
gpu
::
GpuMemsetAsync
(
table
.
data
<
int
>
(),
0
,
table_size
*
sizeof
(
int
),
dev_ctx
.
stream
());
const
int64_t
stride
=
x
.
dims
().
size
()
==
sparse_dim
?
1
:
x
.
non_zero_element
s
().
dims
()[
1
];
*
out
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
x
.
non_zero_element
s
());
x
.
dims
().
size
()
==
sparse_dim
?
1
:
x
.
value
s
().
dims
()[
1
];
*
out
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
x
.
value
s
());
phi
::
funcs
::
SetConstant
<
GPUContext
,
T
>
set_zero
;
set_zero
(
dev_ctx
,
out
,
static_cast
<
T
>
(
0
));
T
*
out_ptr
=
out
->
data
<
T
>
();
...
...
@@ -242,16 +242,15 @@ void SparseMaskHelperGPUKernel(const GPUContext& dev_ctx,
const
int
VecBytes
=
16
;
const
int
VecSize
=
VecBytes
/
sizeof
(
T
);
if
(
stride
%
VecSize
==
0
)
{
MaskCopy
<
T
,
IntT
,
VecSize
>
<<<
config
.
block_per_grid
,
config
.
thread_per_block
,
0
,
dev_ctx
.
stream
()
>>>
(
mask_indexs_ptr
,
table
.
data
<
int
>
(),
mask_indexs
.
numel
(),
stride
,
x
.
non_zero_elements
().
data
<
T
>
(),
out_ptr
);
MaskCopy
<
T
,
IntT
,
VecSize
><<<
config
.
block_per_grid
,
config
.
thread_per_block
,
0
,
dev_ctx
.
stream
()
>>>
(
mask_indexs_ptr
,
table
.
data
<
int
>
(),
mask_indexs
.
numel
(),
stride
,
x
.
values
().
data
<
T
>
(),
out_ptr
);
}
else
{
MaskCopy
<
T
,
IntT
,
1
><<<
config
.
block_per_grid
,
config
.
thread_per_block
,
...
...
@@ -260,7 +259,7 @@ void SparseMaskHelperGPUKernel(const GPUContext& dev_ctx,
table
.
data
<
int
>
(),
mask_indexs
.
numel
(),
stride
,
x
.
non_zero_element
s
().
data
<
T
>
(),
x
.
value
s
().
data
<
T
>
(),
out_ptr
);
}
}
...
...
paddle/phi/kernels/sparse/gpu/mv_grad_kernel.cu
浏览文件 @
016b94c2
...
...
@@ -79,7 +79,7 @@ void MvCooGradKernel(const Context &dev_ctx,
dev_ctx
.
stream
()
>>>
(
dout
.
data
<
T
>
(),
vec
.
data
<
T
>
(),
dx
->
non_zero_indices
().
data
<
data_t
>
(),
dx
->
mutable_
non_zero_element
s
()
->
data
<
T
>
(),
dx
->
mutable_
value
s
()
->
data
<
T
>
(),
dx
->
nnz
());
}));
}
...
...
@@ -127,7 +127,7 @@ void MvCsrGradKernel(const Context &dev_ctx,
vec
.
data
<
T
>
(),
dx
->
non_zero_crows
().
data
<
data_t
>
(),
dx
->
non_zero_cols
().
data
<
data_t
>
(),
dx
->
mutable_
non_zero_element
s
()
->
data
<
T
>
(),
dx
->
mutable_
value
s
()
->
data
<
T
>
(),
row_number
);
}));
}
...
...
paddle/phi/kernels/sparse/gpu/pool_grad_kernel.cu
浏览文件 @
016b94c2
...
...
@@ -68,13 +68,13 @@ void MaxPoolCooGradGPUKernel(const GPUContext& dev_ctx,
const
int
*
counter_ptr
=
counter
.
data
<
int
>
();
phi
::
funcs
::
sparse
::
PrefixSum
(
counter_ptr
,
&
offsets
[
0
],
kernel_size
);
const
T
*
in_features_ptr
=
x
.
non_zero_element
s
().
data
<
T
>
();
const
T
*
out_features_ptr
=
out
.
non_zero_element
s
().
data
<
T
>
();
const
T
*
out_grad_ptr
=
out_grad
.
non_zero_element
s
().
data
<
T
>
();
const
T
*
in_features_ptr
=
x
.
value
s
().
data
<
T
>
();
const
T
*
out_features_ptr
=
out
.
value
s
().
data
<
T
>
();
const
T
*
out_grad_ptr
=
out_grad
.
value
s
().
data
<
T
>
();
// TODO(zhangkaihuo): call phi::sparse::EmptyLike
DenseTensor
x_grad_indices
=
phi
::
EmptyLike
<
IntT
>
(
dev_ctx
,
x
.
non_zero_indices
());
DenseTensor
x_grad_values
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
x
.
non_zero_element
s
());
DenseTensor
x_grad_values
=
phi
::
EmptyLike
<
T
>
(
dev_ctx
,
x
.
value
s
());
x_grad
->
SetMember
(
x_grad_indices
,
x_grad_values
,
x
.
dims
(),
true
);
T
*
x_grad_ptr
=
x_grad_values
.
data
<
T
>
();
phi
::
funcs
::
SetConstant
<
GPUContext
,
T
>
set_zero
;
...
...
paddle/phi/kernels/sparse/gpu/pool_kernel.cu
浏览文件 @
016b94c2
...
...
@@ -95,8 +95,8 @@ void MaxPoolCooGPUKernel(const GPUContext& dev_ctx,
const
IntT
*
rulebook_ptr
=
rulebook
->
data
<
IntT
>
();
T
*
out_features_ptr
=
out
->
mutable_
non_zero_element
s
()
->
data
<
T
>
();
const
T
*
in_features_ptr
=
x
.
non_zero_element
s
().
data
<
T
>
();
T
*
out_features_ptr
=
out
->
mutable_
value
s
()
->
data
<
T
>
();
const
T
*
in_features_ptr
=
x
.
value
s
().
data
<
T
>
();
counter
->
Resize
({
kernel_size
});
int
*
counter_ptr
=
dev_ctx
.
template
HostAlloc
<
int
>(
counter
);
memcpy
(
counter_ptr
,
h_counter
.
data
(),
h_counter
.
size
()
*
sizeof
(
int
));
...
...
@@ -107,7 +107,7 @@ void MaxPoolCooGPUKernel(const GPUContext& dev_ctx,
thrust
::
fill
(
thrust
::
cuda
::
par
.
on
(
dev_ctx
.
stream
()),
#endif
out_features_ptr
,
out_features_ptr
+
out
->
non_zero_element
s
().
numel
(),
out_features_ptr
+
out
->
value
s
().
numel
(),
static_cast
<
T
>
(
0
));
// TODO(zhangkaihuo) Replacing multiple calls with one kernel may be faster
for
(
int
i
=
0
;
i
<
kernel_size
;
i
++
)
{
...
...
paddle/phi/kernels/sparse/gpu/softmax_grad_kernel.cu
浏览文件 @
016b94c2
...
...
@@ -96,9 +96,9 @@ void SoftmaxCsrGradKernel(const Context& dev_ctx,
out
.
non_zero_crows
().
dtype
(),
"SoftmaxCsrGradKernel"
,
([
&
]
{
SoftmaxGradGpuKernel
<
T
,
data_t
><<<
grid
,
block
,
0
,
dev_ctx
.
stream
()
>>>
(
out
.
non_zero_crows
().
data
<
data_t
>
(),
out
.
non_zero_element
s
().
data
<
T
>
(),
dout
.
non_zero_element
s
().
data
<
T
>
(),
dx
->
mutable_
non_zero_element
s
()
->
data
<
T
>
(),
out
.
value
s
().
data
<
T
>
(),
dout
.
value
s
().
data
<
T
>
(),
dx
->
mutable_
value
s
()
->
data
<
T
>
(),
row_number
,
total_row_number
);
}));
...
...
paddle/phi/kernels/sparse/gpu/softmax_kernel.cu
浏览文件 @
016b94c2
...
...
@@ -109,8 +109,8 @@ void SoftmaxCsrKernel(const Context& dev_ctx,
x
.
non_zero_crows
().
dtype
(),
"CsrSoftmaxKernel"
,
([
&
]
{
SoftmaxGpuKernel
<
T
,
data_t
><<<
grid
,
block
,
0
,
dev_ctx
.
stream
()
>>>
(
x
.
non_zero_crows
().
data
<
data_t
>
(),
x
.
non_zero_element
s
().
data
<
T
>
(),
out
->
mutable_
non_zero_element
s
()
->
data
<
T
>
(),
x
.
value
s
().
data
<
T
>
(),
out
->
mutable_
value
s
()
->
data
<
T
>
(),
row_number
,
total_row_number
);
}));
...
...
paddle/phi/kernels/sparse/gpu/sparse_utils_kernel.cu
浏览文件 @
016b94c2
...
...
@@ -215,7 +215,7 @@ void SparseCsrToCooGPUKernel(const GPUContext& dev_ctx,
const
int64_t
non_zero_num
=
x
.
non_zero_cols
().
numel
();
const
auto
&
csr_crows
=
x
.
non_zero_crows
();
const
auto
&
csr_cols
=
x
.
non_zero_cols
();
const
auto
&
csr_values
=
x
.
non_zero_element
s
();
const
auto
&
csr_values
=
x
.
value
s
();
const
IntT
*
csr_crows_data
=
csr_crows
.
data
<
IntT
>
();
const
IntT
*
csr_cols_data
=
csr_cols
.
data
<
IntT
>
();
const
T
*
csr_values_data
=
csr_values
.
data
<
T
>
();
...
...
@@ -361,14 +361,13 @@ void SparseCooToCsrGPUKernel(const GPUContext& dev_ctx,
phi
::
DenseTensor
non_zero_crows
=
phi
::
Empty
<
IntT
>
(
dev_ctx
,
{
batchs
*
(
rows
+
1
)});
phi
::
DenseTensor
non_zero_cols
=
phi
::
Empty
<
IntT
>
(
dev_ctx
,
{
non_zero_num
});
phi
::
DenseTensor
non_zero_elements
=
phi
::
EmptyLike
<
T
,
GPUContext
>
(
dev_ctx
,
x
.
non_zero_elements
());
phi
::
DenseTensor
values
=
phi
::
EmptyLike
<
T
,
GPUContext
>
(
dev_ctx
,
x
.
values
());
IntT
*
csr_crows_data
=
non_zero_crows
.
data
<
IntT
>
();
IntT
*
csr_cols_data
=
non_zero_cols
.
data
<
IntT
>
();
T
*
csr_values_data
=
non_zero_element
s
.
data
<
T
>
();
T
*
csr_values_data
=
value
s
.
data
<
T
>
();
const
auto
&
coo_indices
=
x
.
non_zero_indices
();
const
auto
&
coo_values
=
x
.
non_zero_element
s
();
const
auto
&
coo_values
=
x
.
value
s
();
const
IntT
*
batchs_ptr
=
coo_indices
.
data
<
IntT
>
();
const
IntT
*
coo_rows_data
=
x_dims
.
size
()
==
2
?
batchs_ptr
:
batchs_ptr
+
non_zero_num
;
...
...
@@ -414,7 +413,7 @@ void SparseCooToCsrGPUKernel(const GPUContext& dev_ctx,
sizeof
(
T
)
*
non_zero_num
,
gpuMemcpyDeviceToDevice
,
dev_ctx
.
stream
());
out
->
SetMember
(
non_zero_crows
,
non_zero_cols
,
non_zero_element
s
,
x_dims
);
out
->
SetMember
(
non_zero_crows
,
non_zero_cols
,
value
s
,
x_dims
);
}
template
<
typename
T
,
typename
Context
>
...
...
@@ -455,7 +454,7 @@ void SparseCooToDenseGPUKernel(const GPUContext& dev_ctx,
const
auto
non_zero_num
=
x
.
nnz
();
const
auto
dense_dims
=
x
.
dims
();
const
auto
indices
=
x
.
non_zero_indices
();
const
auto
values
=
x
.
non_zero_element
s
();
const
auto
values
=
x
.
value
s
();
const
auto
indices_dims
=
indices
.
dims
();
int64_t
sparse_dim
=
indices_dims
[
0
];
if
(
indices_dims
.
size
()
==
1
)
{
...
...
@@ -465,9 +464,8 @@ void SparseCooToDenseGPUKernel(const GPUContext& dev_ctx,
const
auto
place
=
dev_ctx
.
GetPlace
();
const
T
*
x_data
=
values
.
data
<
T
>
();
*
out
=
phi
::
Empty
(
dev_ctx
,
phi
::
DenseTensorMeta
(
x
.
dtype
(),
x
.
dims
(),
x
.
non_zero_elements
().
layout
()));
*
out
=
phi
::
Empty
(
dev_ctx
,
phi
::
DenseTensorMeta
(
x
.
dtype
(),
x
.
dims
(),
x
.
values
().
layout
()));
T
*
out_data
=
out
->
data
<
T
>
();
int64_t
base_offset
=
1
;
for
(
int64_t
i
=
0
;
i
<
dense_dim
;
i
++
)
{
...
...
paddle/phi/kernels/sparse/gpu/unary_kernel.cu
浏览文件 @
016b94c2
...
...
@@ -40,8 +40,8 @@ void DivCooScalarKernel(const Context& dev_ctx,
SparseCooTensor
*
out
)
{
EmptyLikeCooKernel
<
T
,
Context
>
(
dev_ctx
,
x
,
out
);
std
::
vector
<
const
DenseTensor
*>
ins
=
{
&
(
x
.
non_zero_element
s
())};
std
::
vector
<
DenseTensor
*>
outs
=
{
out
->
mutable_
non_zero_element
s
()};
std
::
vector
<
const
DenseTensor
*>
ins
=
{
&
(
x
.
value
s
())};
std
::
vector
<
DenseTensor
*>
outs
=
{
out
->
mutable_
value
s
()};
DivScalarFunctor
<
T
>
func
(
static_cast
<
T
>
(
scalar
));
funcs
::
ElementwiseKernel
<
T
,
DivScalarFunctor
<
T
>>
(
dev_ctx
,
ins
,
&
outs
,
func
);
}
...
...
@@ -53,8 +53,8 @@ void DivCsrScalarKernel(const Context& dev_ctx,
SparseCsrTensor
*
out
)
{
EmptyLikeCsrKernel
<
T
,
Context
>
(
dev_ctx
,
x
,
out
);
std
::
vector
<
const
DenseTensor
*>
ins
=
{
&
(
x
.
non_zero_element
s
())};
std
::
vector
<
DenseTensor
*>
outs
=
{
out
->
mutable_
non_zero_element
s
()};
std
::
vector
<
const
DenseTensor
*>
ins
=
{
&
(
x
.
value
s
())};
std
::
vector
<
DenseTensor
*>
outs
=
{
out
->
mutable_
value
s
()};
DivScalarFunctor
<
T
>
func
(
static_cast
<
T
>
(
scalar
));
funcs
::
ElementwiseKernel
<
T
,
DivScalarFunctor
<
T
>>
(
dev_ctx
,
ins
,
&
outs
,
func
);
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录