Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
2b879a69
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
2b879a69
编写于
9月 30, 2022
作者:
六
六个骨头
提交者:
GitHub
9月 30, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
【Hackathon No.21】为 Paddle 新增 paddle.incubate.sparse.transpose 稀疏 API (#45849)
上级
4b9dae01
变更
16
隐藏空白更改
内联
并排
Showing
16 changed file
with
1091 addition
and
3 deletion
+1091
-3
paddle/phi/api/yaml/sparse_backward.yaml
paddle/phi/api/yaml/sparse_backward.yaml
+11
-0
paddle/phi/api/yaml/sparse_ops.yaml
paddle/phi/api/yaml/sparse_ops.yaml
+12
-0
paddle/phi/core/sparse_coo_tensor.h
paddle/phi/core/sparse_coo_tensor.h
+1
-1
paddle/phi/core/sparse_csr_tensor.h
paddle/phi/core/sparse_csr_tensor.h
+2
-2
paddle/phi/infermeta/sparse/unary.h
paddle/phi/infermeta/sparse/unary.h
+7
-0
paddle/phi/kernels/sparse/cpu/transpose_grad_kernel.cc
paddle/phi/kernels/sparse/cpu/transpose_grad_kernel.cc
+78
-0
paddle/phi/kernels/sparse/cpu/transpose_kernel.cc
paddle/phi/kernels/sparse/cpu/transpose_kernel.cc
+231
-0
paddle/phi/kernels/sparse/gpu/transpose_grad_kernel.cu
paddle/phi/kernels/sparse/gpu/transpose_grad_kernel.cu
+80
-0
paddle/phi/kernels/sparse/gpu/transpose_kernel.cu
paddle/phi/kernels/sparse/gpu/transpose_kernel.cu
+338
-0
paddle/phi/kernels/sparse/unary_grad_kernel.h
paddle/phi/kernels/sparse/unary_grad_kernel.h
+12
-0
paddle/phi/kernels/sparse/unary_kernel.h
paddle/phi/kernels/sparse/unary_kernel.h
+42
-0
paddle/phi/tests/kernels/CMakeLists.txt
paddle/phi/tests/kernels/CMakeLists.txt
+4
-0
paddle/phi/tests/kernels/test_sparse_transpose_dev_api.cc
paddle/phi/tests/kernels/test_sparse_transpose_dev_api.cc
+165
-0
python/paddle/fluid/tests/unittests/test_sparse_transpose_op.py
.../paddle/fluid/tests/unittests/test_sparse_transpose_op.py
+75
-0
python/paddle/incubate/sparse/__init__.py
python/paddle/incubate/sparse/__init__.py
+2
-0
python/paddle/incubate/sparse/unary.py
python/paddle/incubate/sparse/unary.py
+31
-0
未找到文件。
paddle/phi/api/yaml/sparse_backward.yaml
浏览文件 @
2b879a69
...
...
@@ -385,6 +385,17 @@
kernel
:
func
:
coo_to_dense { sparse_coo -> dense }
-
backward_op
:
transpose_grad
forward
:
transpose(Tensor x, int[] perm) -> Tensor(out)
args
:
(Tensor out_grad, int[] perm)
output
:
Tensor(x_grad)
infer_meta
:
func
:
TransposeGradInferMeta
param
:
[
out_grad
,
perm
]
kernel
:
func
:
transpose_coo_grad {sparse_coo -> sparse_coo},
transpose_csr_grad {sparse_csr -> sparse_csr}
-
backward_op
:
values_grad
forward
:
values_coo(Tensor x) -> Tensor(out)
args
:
(Tensor x, Tensor out_grad)
...
...
paddle/phi/api/yaml/sparse_ops.yaml
浏览文件 @
2b879a69
...
...
@@ -457,3 +457,15 @@
mv_csr{sparse_csr, dense -> dense}
layout
:
x
backward
:
mv_grad
-
op
:
transpose
args
:
(Tensor x, int[] perm)
output
:
Tensor(out)
infer_meta
:
func
:
TransposeInferMeta
param
:
[
x
,
perm
]
kernel
:
func
:
transpose_coo{sparse_coo -> sparse_coo},
transpose_csr{sparse_csr -> sparse_csr}
layout
:
x
backward
:
transpose_grad
paddle/phi/core/sparse_coo_tensor.h
浏览文件 @
2b879a69
...
...
@@ -274,7 +274,7 @@ class SparseCooTensor : public TensorBase,
[0, 0, 0, 0]]
dims_ = (4, 4)
non_zero_elements_ = [[0, 1, 0, 0], [0, 0, 4, 0]]
non_zero_indices_ = [
0, 2],
non_zero_indices_ = [
[0, 2], [1, 2]]
*/
};
...
...
paddle/phi/core/sparse_csr_tensor.h
浏览文件 @
2b879a69
...
...
@@ -209,7 +209,7 @@ class SparseCsrTensor : public TensorBase,
[0, 0, 4, 0],
[0, 5, 0, 6]]
dims_ = (4, 4)
non_zero_elements_ = [1, 2, 3, 4, 5
,
6]
non_zero_elements_ = [1, 2, 3, 4, 5
,
6]
non_zero_crows_ = [0, 1, 3, 4, 6]
non_zero_cols_ = [1, 0, 3, 2, 1, 3]
*/
...
...
@@ -228,7 +228,7 @@ class SparseCsrTensor : public TensorBase,
[0, 0, 4, 0],
[0, 5, 0, 0]]]
dims_ = (2, 4, 4)
non_zero_elements_ = [1, 2, 3, 4, 5
,
6, 1, 2, 3, 4, 5]
non_zero_elements_ = [1, 2, 3, 4, 5
,
6, 1, 2, 3, 4, 5]
non_zero_crows_ = [0, 1, 3, 4, 6, 0, 1, 2, 4, 5]
non_zero_cols_ = [1, 0, 3, 2, 1, 3, 1, 0, 3, 2, 1]
*/
...
...
paddle/phi/infermeta/sparse/unary.h
浏览文件 @
2b879a69
...
...
@@ -24,5 +24,12 @@ void IndicesInferMeta(const MetaTensor& x, MetaTensor* out);
void
ValuesInferMeta
(
const
MetaTensor
&
x
,
MetaTensor
*
out
);
void
TransposeInferMeta
(
const
MetaTensor
&
x
,
const
std
::
vector
<
int
>&
axis
,
MetaTensor
*
out
);
void
TransposeGradInferMeta
(
const
MetaTensor
&
x
,
const
std
::
vector
<
int
>&
axis
,
MetaTensor
*
out
);
}
// namespace sparse
}
// namespace phi
paddle/phi/kernels/sparse/cpu/transpose_grad_kernel.cc
0 → 100644
浏览文件 @
2b879a69
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/sparse/unary_grad_kernel.h"
#include "paddle/phi/kernels/sparse/unary_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/sparse/empty_kernel.h"
#include "paddle/phi/kernels/sparse/impl/unary_grad_kernel_impl.h"
namespace
phi
{
namespace
sparse
{
std
::
vector
<
int
>
get_cpu_grad_perm
(
std
::
vector
<
int
>
perm
)
{
std
::
vector
<
int
>
grad_perm
(
perm
.
size
());
for
(
unsigned
int
i
=
0
;
i
<
perm
.
size
();
++
i
)
{
grad_perm
[
perm
[
i
]]
=
i
;
}
return
grad_perm
;
}
template
<
typename
T
,
typename
Context
>
void
TransposeCooGradKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
dout
,
const
std
::
vector
<
int
>&
perm
,
SparseCooTensor
*
dx
)
{
std
::
vector
<
int
>
grad_perm
=
get_cpu_grad_perm
(
perm
);
TransposeCooKernel
<
T
,
Context
>
(
dev_ctx
,
dout
,
grad_perm
,
dx
);
}
template
<
typename
T
,
typename
Context
>
void
TransposeCsrGradKernel
(
const
Context
&
dev_ctx
,
const
SparseCsrTensor
&
dout
,
const
std
::
vector
<
int
>&
perm
,
SparseCsrTensor
*
dx
)
{
std
::
vector
<
int
>
grad_perm
=
get_cpu_grad_perm
(
perm
);
TransposeCsrKernel
<
T
,
Context
>
(
dev_ctx
,
dout
,
grad_perm
,
dx
);
}
}
// namespace sparse
}
// namespace phi
PD_REGISTER_KERNEL
(
transpose_coo_grad
,
CPU
,
ALL_LAYOUT
,
phi
::
sparse
::
TransposeCooGradKernel
,
float
,
double
,
int8_t
,
uint8_t
,
int16_t
,
int
,
int64_t
,
bool
)
{}
PD_REGISTER_KERNEL
(
transpose_csr_grad
,
CPU
,
ALL_LAYOUT
,
phi
::
sparse
::
TransposeCsrGradKernel
,
float
,
double
,
int8_t
,
uint8_t
,
int16_t
,
int
,
int64_t
,
bool
)
{}
paddle/phi/kernels/sparse/cpu/transpose_kernel.cc
0 → 100644
浏览文件 @
2b879a69
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/sparse/unary_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/empty_kernel.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/eigen/eigen_function.h"
#include "paddle/phi/kernels/sparse/empty_kernel.h"
namespace
phi
{
namespace
sparse
{
template
<
typename
T
,
typename
Context
>
void
TransposeCooKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
std
::
vector
<
int
>&
perm
,
SparseCooTensor
*
out
)
{
// create out sparse tensor
int64_t
x_nnz
=
x
.
nnz
();
DDim
out_dims
=
x
.
dims
().
transpose
(
perm
);
DenseTensor
out_indices
=
EmptyLike
<
int64_t
,
Context
>
(
dev_ctx
,
x
.
indices
());
DenseTensor
out_values
(
x
.
values
());
out
->
SetMember
(
out_indices
,
out_values
,
out_dims
,
x
.
coalesced
());
// compute values of indices
const
DenseTensor
&
x_indices
=
x
.
indices
();
const
auto
*
x_indices_data
=
x_indices
.
data
<
int64_t
>
();
auto
*
out_indices_data
=
out_indices
.
data
<
int64_t
>
();
for
(
unsigned
int
i
=
0
;
i
<
perm
.
size
();
++
i
)
{
for
(
int64_t
j
=
0
;
j
<
x_nnz
;
++
j
)
{
out_indices_data
[
j
+
i
*
x_nnz
]
=
x_indices_data
[
j
+
perm
[
i
]
*
x_nnz
];
}
}
}
template
<
typename
T
,
typename
Context
>
void
TransposeCsrKernel
(
const
Context
&
dev_ctx
,
const
SparseCsrTensor
&
x
,
const
std
::
vector
<
int
>&
perm
,
SparseCsrTensor
*
out
)
{
unsigned
int
n_dim
=
perm
.
size
();
const
DenseTensor
&
x_crows
=
x
.
crows
();
const
DenseTensor
&
x_cols
=
x
.
cols
();
const
DenseTensor
&
x_values
=
x
.
values
();
DenseTensor
out_crows
,
out_cols
,
out_values
;
// return a copy of x
if
(
perm
[
0
]
==
0
&&
perm
[
1
]
==
1
&&
(
n_dim
==
2
||
perm
[
2
]
==
2
))
{
out_crows
=
x_crows
;
out_cols
=
x_cols
;
out_values
=
x_values
;
out
->
SetMember
(
out_crows
,
out_cols
,
out_values
,
x
.
dims
());
return
;
}
// create out sparse tensor
DDim
out_dims
=
x
.
dims
().
transpose
(
perm
);
if
(
n_dim
==
2
)
{
out_crows
=
Empty
<
int64_t
,
Context
>
(
dev_ctx
,
{
out_dims
[
0
]
+
1
});
}
else
{
out_crows
=
Empty
<
int64_t
,
Context
>
(
dev_ctx
,
{
out_dims
[
0
]
*
(
out_dims
[
1
]
+
1
)});
}
out_cols
=
EmptyLike
<
int64_t
,
Context
>
(
dev_ctx
,
x
.
cols
());
out_values
=
EmptyLike
<
T
,
Context
>
(
dev_ctx
,
x
.
values
());
out
->
SetMember
(
out_crows
,
out_cols
,
out_values
,
out_dims
);
// transpose by two stages
if
(
perm
[
0
]
==
1
&&
perm
[
1
]
==
2
)
{
// perm == {1, 2, 0}
SparseCsrTensor
temp
;
TransposeCsrKernel
<
T
,
Context
>
(
dev_ctx
,
x
,
{
1
,
0
,
2
},
&
temp
);
TransposeCsrKernel
<
T
,
Context
>
(
dev_ctx
,
temp
,
{
0
,
2
,
1
},
out
);
return
;
}
else
if
(
perm
[
0
]
==
2
&&
perm
[
1
]
==
0
)
{
// perm == {2, 0, 1}
SparseCsrTensor
temp
;
TransposeCsrKernel
<
T
,
Context
>
(
dev_ctx
,
x
,
{
0
,
2
,
1
},
&
temp
);
TransposeCsrKernel
<
T
,
Context
>
(
dev_ctx
,
temp
,
{
1
,
0
,
2
},
out
);
return
;
}
else
if
(
perm
[
0
]
==
2
&&
perm
[
1
]
==
1
)
{
// perm == {2, 1, 0}
SparseCsrTensor
temp
;
TransposeCsrKernel
<
T
,
Context
>
(
dev_ctx
,
x
,
{
1
,
0
,
2
},
&
temp
);
TransposeCsrKernel
<
T
,
Context
>
(
dev_ctx
,
temp
,
{
2
,
0
,
1
},
out
);
return
;
}
int64_t
*
out_crows_data
=
out_crows
.
data
<
int64_t
>
();
int64_t
*
out_cols_data
=
out_cols
.
data
<
int64_t
>
();
T
*
out_values_data
=
out_values
.
data
<
T
>
();
const
int64_t
*
x_crows_data
=
x_crows
.
data
<
int64_t
>
();
const
int64_t
*
x_cols_data
=
x_cols
.
data
<
int64_t
>
();
const
T
*
x_values_data
=
x_values
.
data
<
T
>
();
int64_t
x_nnz
=
x
.
nnz
();
if
(
n_dim
==
2
)
{
// perm == {1, 0}
// compute out_crows_data by x_cols_data
for
(
int
i
=
0
;
i
<
out_dims
[
0
];
++
i
)
{
out_crows_data
[
i
]
=
0
;
}
for
(
int
i
=
0
;
i
<
x_nnz
;
++
i
)
{
int
j
=
x_cols_data
[
i
];
out_crows_data
[
j
+
1
]
++
;
}
out_crows_data
[
out_dims
[
0
]]
=
x_nnz
;
for
(
int
i
=
1
;
i
<
out_dims
[
0
];
++
i
)
{
out_crows_data
[
i
]
+=
out_crows_data
[
i
-
1
];
}
// compute out_cols_data and out_values_data by out_crows_data and x
std
::
unordered_map
<
int64_t
,
int
>
cols_offset
;
for
(
int
i
=
0
;
i
<
x
.
dims
()[
0
];
++
i
)
{
int64_t
start
=
x_crows_data
[
i
];
int64_t
end
=
x_crows_data
[
i
+
1
];
for
(
int64_t
j
=
start
;
j
<
end
;
++
j
)
{
int64_t
x_cols_j
=
x_cols_data
[
j
];
int64_t
jjj
=
out_crows_data
[
x_cols_j
];
if
(
cols_offset
.
count
(
jjj
))
{
cols_offset
[
jjj
]
++
;
}
else
{
cols_offset
[
jjj
]
=
0
;
}
int64_t
jjj_offset
=
jjj
+
cols_offset
[
jjj
];
out_cols_data
[
jjj_offset
]
=
i
;
out_values_data
[
jjj_offset
]
=
x_values_data
[
j
];
}
}
}
else
{
// n_dim == 3
int
out_n_rows
=
out_dims
[
1
];
int
x_n_rows
=
x
.
dims
()[
1
];
for
(
int
k
=
0
;
k
<
out_dims
[
0
];
++
k
)
{
if
(
perm
[
0
]
==
0
)
{
// perm == {0, 2, 1}
// compute out_crows_data by x_cols_data
for
(
int
i
=
0
;
i
<
out_n_rows
;
++
i
)
{
out_crows_data
[
i
]
=
0
;
}
for
(
int
i
=
0
;
i
<
x_crows_data
[
x_n_rows
];
++
i
)
{
int
j
=
x_cols_data
[
i
];
out_crows_data
[
j
+
1
]
++
;
}
out_crows_data
[
out_n_rows
]
=
x_crows_data
[
x_n_rows
];
for
(
int
i
=
1
;
i
<
out_n_rows
;
++
i
)
{
out_crows_data
[
i
]
+=
out_crows_data
[
i
-
1
];
}
// compute out_cols_data and out_values_data by out_crows_data and x
std
::
unordered_map
<
int64_t
,
int
>
cols_offset
;
for
(
int
i
=
0
;
i
<
x_n_rows
;
++
i
)
{
int64_t
start
=
x_crows_data
[
i
];
int64_t
end
=
x_crows_data
[
i
+
1
];
for
(
int64_t
j
=
start
;
j
<
end
;
++
j
)
{
int64_t
x_cols_j
=
x_cols_data
[
j
];
int64_t
jjj
=
out_crows_data
[
x_cols_j
];
if
(
cols_offset
.
count
(
jjj
))
{
cols_offset
[
jjj
]
++
;
}
else
{
cols_offset
[
jjj
]
=
0
;
}
int64_t
jjj_offset
=
jjj
+
cols_offset
[
jjj
];
out_cols_data
[
jjj_offset
]
=
i
;
out_values_data
[
jjj_offset
]
=
x_values_data
[
j
];
}
}
// x offset
x_cols_data
+=
x_crows_data
[
x_n_rows
];
x_values_data
+=
x_crows_data
[
x_n_rows
];
x_crows_data
+=
x_n_rows
+
1
;
}
else
if
(
perm
[
0
]
==
1
&&
perm
[
1
]
==
0
)
{
// perm == {1, 0, 2}
for
(
int
i
=
0
;
i
<
out_n_rows
;
++
i
)
{
out_crows_data
[
i
]
=
0
;
}
int
x_cols_offset
=
0
;
int
out_cols_index
=
0
;
for
(
int
i
=
0
;
i
<
x
.
dims
()[
0
];
++
i
)
{
int
x_crows_index
=
i
*
(
x_n_rows
+
1
);
int
start
=
x_crows_data
[
x_crows_index
+
k
];
int
end
=
x_crows_data
[
x_crows_index
+
1
+
k
];
out_crows_data
[
i
+
1
]
=
end
-
start
;
for
(
int
j
=
start
;
j
<
end
;
++
j
)
{
out_cols_data
[
out_cols_index
]
=
x_cols_data
[
x_cols_offset
+
j
];
out_values_data
[
out_cols_index
]
=
x_values_data
[
x_cols_offset
+
j
];
out_cols_index
++
;
}
x_cols_offset
+=
x_crows_data
[
x_crows_index
+
x_n_rows
];
}
for
(
int
i
=
1
;
i
<=
out_n_rows
;
++
i
)
{
out_crows_data
[
i
]
+=
out_crows_data
[
i
-
1
];
}
}
// out offset
out_cols_data
+=
out_crows_data
[
out_n_rows
];
out_values_data
+=
out_crows_data
[
out_n_rows
];
out_crows_data
+=
out_n_rows
+
1
;
}
}
}
}
// namespace sparse
}
// namespace phi
PD_REGISTER_KERNEL
(
transpose_coo
,
CPU
,
ALL_LAYOUT
,
phi
::
sparse
::
TransposeCooKernel
,
float
,
double
,
int8_t
,
uint8_t
,
int16_t
,
int
,
int64_t
,
bool
)
{}
PD_REGISTER_KERNEL
(
transpose_csr
,
CPU
,
ALL_LAYOUT
,
phi
::
sparse
::
TransposeCsrKernel
,
float
,
double
,
int8_t
,
uint8_t
,
int16_t
,
int
,
int64_t
,
bool
)
{}
paddle/phi/kernels/sparse/gpu/transpose_grad_kernel.cu
0 → 100644
浏览文件 @
2b879a69
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/sparse/unary_grad_kernel.h"
#include "paddle/phi/kernels/sparse/unary_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/sparse/empty_kernel.h"
#include "paddle/phi/kernels/sparse/impl/unary_grad_kernel_impl.h"
namespace
phi
{
namespace
sparse
{
std
::
vector
<
int
>
get_gpu_grad_perm
(
std
::
vector
<
int
>
perm
)
{
std
::
vector
<
int
>
grad_perm
(
perm
.
size
());
for
(
unsigned
int
i
=
0
;
i
<
perm
.
size
();
++
i
)
{
grad_perm
[
perm
[
i
]]
=
i
;
}
return
grad_perm
;
}
template
<
typename
T
,
typename
Context
>
void
TransposeCooGradKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
dout
,
const
std
::
vector
<
int
>&
perm
,
SparseCooTensor
*
dx
)
{
std
::
vector
<
int
>
grad_perm
=
get_gpu_grad_perm
(
perm
);
TransposeCooKernel
<
T
,
Context
>
(
dev_ctx
,
dout
,
grad_perm
,
dx
);
}
template
<
typename
T
,
typename
Context
>
void
TransposeCsrGradKernel
(
const
Context
&
dev_ctx
,
const
SparseCsrTensor
&
dout
,
const
std
::
vector
<
int
>&
perm
,
SparseCsrTensor
*
dx
)
{
std
::
vector
<
int
>
grad_perm
=
get_gpu_grad_perm
(
perm
);
TransposeCsrKernel
<
T
,
Context
>
(
dev_ctx
,
dout
,
grad_perm
,
dx
);
}
}
// namespace sparse
}
// namespace phi
PD_REGISTER_KERNEL
(
transpose_coo_grad
,
GPU
,
ALL_LAYOUT
,
phi
::
sparse
::
TransposeCooGradKernel
,
phi
::
dtype
::
float16
,
float
,
double
,
int8_t
,
uint8_t
,
int16_t
,
int
,
int64_t
,
bool
)
{}
PD_REGISTER_KERNEL
(
transpose_csr_grad
,
GPU
,
ALL_LAYOUT
,
phi
::
sparse
::
TransposeCsrGradKernel
,
phi
::
dtype
::
float16
,
float
,
double
,
int8_t
,
uint8_t
,
int16_t
,
int
,
int64_t
,
bool
)
{}
paddle/phi/kernels/sparse/gpu/transpose_kernel.cu
0 → 100644
浏览文件 @
2b879a69
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/sparse/unary_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/empty_kernel.h"
#include "paddle/phi/kernels/funcs/elementwise_base.h"
#include "paddle/phi/kernels/sparse/empty_kernel.h"
namespace
phi
{
namespace
sparse
{
__global__
void
TransposeCooCudaKernel
(
const
int64_t
*
x_indices_data
,
const
int
*
perm
,
const
std
::
size_t
n_dim
,
const
int64_t
x_nnz
,
int64_t
*
out_indices_data
)
{
CUDA_KERNEL_LOOP_TYPE
(
index
,
x_nnz
*
n_dim
,
int64_t
)
{
int64_t
i
=
index
/
x_nnz
;
int64_t
j
=
index
%
x_nnz
;
out_indices_data
[
index
]
=
x_indices_data
[
j
+
perm
[
i
]
*
x_nnz
];
}
}
template
<
typename
T
>
__global__
void
TransposeCsr2DCudaKernel
(
const
int64_t
*
x_crows_data
,
const
int64_t
*
x_cols_data
,
const
T
*
x_values_data
,
const
int
*
perm
,
const
int64_t
*
x_dims
,
const
int64_t
*
out_dims
,
const
int64_t
x_nnz
,
int64_t
*
out_crows_data
,
int64_t
*
out_cols_data
,
T
*
out_values_data
)
{
int64_t
__index__
=
static_cast
<
int64_t
>
(
blockIdx
.
x
)
*
blockDim
.
x
+
threadIdx
.
x
;
// compute out_crows_data by x_cols_data
for
(
int64_t
i
=
__index__
;
i
<=
out_dims
[
0
];
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
out_crows_data
[
i
]
=
0
;
}
__syncthreads
();
if
(
__index__
==
0
)
{
for
(
int64_t
i
=
0
;
i
<
x_nnz
;
++
i
)
{
int
j
=
x_cols_data
[
i
];
out_crows_data
[
j
+
2
]
++
;
}
for
(
int64_t
i
=
0
;
i
<
out_dims
[
0
];
i
+=
1
)
{
out_crows_data
[
i
+
1
]
+=
out_crows_data
[
i
];
}
// compute out_cols_data and out_values_data by out_crows_data and x
for
(
int
i
=
0
;
i
<
x_dims
[
0
];
++
i
)
{
int64_t
start
=
x_crows_data
[
i
];
int64_t
end
=
x_crows_data
[
i
+
1
];
for
(
int64_t
j
=
start
;
j
<
end
;
++
j
)
{
int64_t
x_cols_j
=
x_cols_data
[
j
]
+
1
;
int64_t
jjj
=
out_crows_data
[
x_cols_j
];
out_cols_data
[
jjj
]
=
i
;
out_values_data
[
jjj
]
=
x_values_data
[
j
];
out_crows_data
[
x_cols_j
]
++
;
}
}
}
}
template
<
typename
T
>
__global__
void
TransposeCsr3DCudaKernel
(
const
int64_t
*
x_crows_data
,
const
int64_t
*
x_cols_data
,
const
T
*
x_values_data
,
const
int
*
perm
,
const
int64_t
*
x_dims
,
const
int64_t
*
out_dims
,
const
std
::
size_t
n_dim
,
const
int64_t
x_nnz
,
int64_t
*
out_crows_data
,
int64_t
*
out_cols_data
,
T
*
out_values_data
)
{
int64_t
__index__
=
static_cast
<
int64_t
>
(
blockIdx
.
x
)
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
__index__
==
0
)
{
int
out_n_rows
=
out_dims
[
1
];
int
x_n_rows
=
x_dims
[
1
];
for
(
int
k
=
0
;
k
<
out_dims
[
0
];
++
k
)
{
if
(
perm
[
0
]
==
0
)
{
// dims == {0, 2, 1}
// compute out_crows_data by x_cols_data
for
(
int
i
=
0
;
i
<=
out_n_rows
;
++
i
)
{
out_crows_data
[
i
]
=
0
;
}
for
(
int
i
=
0
;
i
<
x_crows_data
[
x_n_rows
];
++
i
)
{
int
j
=
x_cols_data
[
i
];
out_crows_data
[
j
+
2
]
++
;
}
for
(
int
i
=
0
;
i
<
out_n_rows
;
++
i
)
{
out_crows_data
[
i
+
1
]
+=
out_crows_data
[
i
];
}
// compute out_cols_data and out_values_data by out_crows_data and x
for
(
int
i
=
0
;
i
<
x_n_rows
;
++
i
)
{
int64_t
start
=
x_crows_data
[
i
];
int64_t
end
=
x_crows_data
[
i
+
1
];
for
(
int64_t
j
=
start
;
j
<
end
;
++
j
)
{
int64_t
x_cols_j
=
x_cols_data
[
j
]
+
1
;
int64_t
jjj
=
out_crows_data
[
x_cols_j
];
out_cols_data
[
jjj
]
=
i
;
out_values_data
[
jjj
]
=
x_values_data
[
j
];
out_crows_data
[
x_cols_j
]
++
;
}
}
// x offset
x_cols_data
+=
x_crows_data
[
x_n_rows
];
x_values_data
+=
x_crows_data
[
x_n_rows
];
x_crows_data
+=
x_n_rows
+
1
;
}
else
if
(
perm
[
0
]
==
1
&&
perm
[
1
]
==
0
)
{
// perm == {1, 0, 2}
for
(
int
i
=
0
;
i
<
out_n_rows
;
++
i
)
{
out_crows_data
[
i
]
=
0
;
}
int
x_cols_offset
=
0
;
int
out_cols_index
=
0
;
for
(
int
i
=
0
;
i
<
x_dims
[
0
];
++
i
)
{
int
x_crows_index
=
i
*
(
x_n_rows
+
1
);
int
start
=
x_crows_data
[
x_crows_index
+
k
];
int
end
=
x_crows_data
[
x_crows_index
+
1
+
k
];
out_crows_data
[
i
+
1
]
=
end
-
start
;
for
(
int
j
=
start
;
j
<
end
;
++
j
)
{
out_cols_data
[
out_cols_index
]
=
x_cols_data
[
x_cols_offset
+
j
];
out_values_data
[
out_cols_index
]
=
x_values_data
[
x_cols_offset
+
j
];
out_cols_index
++
;
}
x_cols_offset
+=
x_crows_data
[
x_crows_index
+
x_n_rows
];
}
for
(
int
i
=
1
;
i
<=
out_n_rows
;
++
i
)
{
out_crows_data
[
i
]
+=
out_crows_data
[
i
-
1
];
}
}
// out offset
out_cols_data
+=
out_crows_data
[
out_n_rows
];
out_values_data
+=
out_crows_data
[
out_n_rows
];
out_crows_data
+=
out_n_rows
+
1
;
}
}
}
template
<
typename
T
,
typename
Context
>
void
TransposeCooKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
std
::
vector
<
int
>
&
perm
,
SparseCooTensor
*
out
)
{
// create out sparse tensor
int64_t
x_nnz
=
x
.
nnz
();
std
::
size_t
n_dim
=
perm
.
size
();
DDim
out_dims
=
x
.
dims
().
transpose
(
perm
);
DenseTensor
out_indices
=
EmptyLike
<
int64_t
,
Context
>
(
dev_ctx
,
x
.
indices
());
DenseTensor
out_values
(
x
.
values
());
out
->
SetMember
(
out_indices
,
out_values
,
out_dims
,
x
.
coalesced
());
// compute values of indices
const
DenseTensor
&
x_indices
=
x
.
indices
();
const
auto
*
x_indices_data
=
x_indices
.
data
<
int64_t
>
();
auto
*
out_indices_data
=
out_indices
.
data
<
int64_t
>
();
int
*
d_perm
;
#ifdef PADDLE_WITH_HIP
hipMalloc
(
reinterpret_cast
<
void
**>
(
&
d_perm
),
sizeof
(
int
)
*
perm
.
size
());
hipMemcpy
(
d_perm
,
perm
.
data
(),
sizeof
(
int
)
*
perm
.
size
(),
hipMemcpyHostToDevice
);
#else
cudaMalloc
(
reinterpret_cast
<
void
**>
(
&
d_perm
),
sizeof
(
int
)
*
perm
.
size
());
cudaMemcpy
(
d_perm
,
perm
.
data
(),
sizeof
(
int
)
*
perm
.
size
(),
cudaMemcpyHostToDevice
);
#endif
auto
config
=
phi
::
backends
::
gpu
::
GetGpuLaunchConfig1D
(
dev_ctx
,
x_nnz
*
n_dim
,
1
);
TransposeCooCudaKernel
<<<
config
.
block_per_grid
.
x
,
config
.
thread_per_block
.
x
,
0
,
dev_ctx
.
stream
()
>>>
(
x_indices_data
,
d_perm
,
n_dim
,
x_nnz
,
out_indices_data
);
}
template
<
typename
T
,
typename
Context
>
void
TransposeCsrKernel
(
const
Context
&
dev_ctx
,
const
SparseCsrTensor
&
x
,
const
std
::
vector
<
int
>
&
perm
,
SparseCsrTensor
*
out
)
{
std
::
size_t
n_dim
=
perm
.
size
();
const
DenseTensor
&
x_crows
=
x
.
crows
();
const
DenseTensor
&
x_cols
=
x
.
cols
();
const
DenseTensor
&
x_values
=
x
.
non_zero_elements
();
DenseTensor
out_crows
,
out_cols
,
out_values
;
// return a copy of x
if
(
perm
[
0
]
==
0
&&
perm
[
1
]
==
1
&&
(
n_dim
==
2
||
perm
[
2
]
==
2
))
{
out_crows
=
x_crows
;
out_cols
=
x_cols
;
out_values
=
x_values
;
out
->
SetMember
(
out_crows
,
out_cols
,
out_values
,
x
.
dims
());
return
;
}
// create out sparse tensor
DDim
out_dims
=
x
.
dims
().
transpose
(
perm
);
if
(
n_dim
==
2
)
{
out_crows
=
Empty
<
int64_t
,
Context
>
(
dev_ctx
,
{
out_dims
[
0
]
+
1
});
}
else
{
out_crows
=
Empty
<
int64_t
,
Context
>
(
dev_ctx
,
{
out_dims
[
0
]
*
(
out_dims
[
1
]
+
1
)});
}
out_cols
=
EmptyLike
<
int64_t
,
Context
>
(
dev_ctx
,
x
.
cols
());
out_values
=
EmptyLike
<
T
,
Context
>
(
dev_ctx
,
x
.
values
());
out
->
SetMember
(
out_crows
,
out_cols
,
out_values
,
out_dims
);
// transpose by two stages
if
(
perm
[
0
]
==
1
&&
perm
[
1
]
==
2
)
{
// perm == {1, 2, 0}
SparseCsrTensor
temp
;
TransposeCsrKernel
<
T
,
Context
>
(
dev_ctx
,
x
,
{
1
,
0
,
2
},
&
temp
);
TransposeCsrKernel
<
T
,
Context
>
(
dev_ctx
,
temp
,
{
0
,
2
,
1
},
out
);
return
;
}
else
if
(
perm
[
0
]
==
2
&&
perm
[
1
]
==
0
)
{
// perm == {2, 0, 1}
SparseCsrTensor
temp
;
TransposeCsrKernel
<
T
,
Context
>
(
dev_ctx
,
x
,
{
0
,
2
,
1
},
&
temp
);
TransposeCsrKernel
<
T
,
Context
>
(
dev_ctx
,
temp
,
{
1
,
0
,
2
},
out
);
return
;
}
else
if
(
perm
[
0
]
==
2
&&
perm
[
1
]
==
1
)
{
// perm == {2, 1, 0}
SparseCsrTensor
temp
;
TransposeCsrKernel
<
T
,
Context
>
(
dev_ctx
,
x
,
{
1
,
0
,
2
},
&
temp
);
TransposeCsrKernel
<
T
,
Context
>
(
dev_ctx
,
temp
,
{
2
,
0
,
1
},
out
);
return
;
}
int64_t
*
out_crows_data
=
out_crows
.
data
<
int64_t
>
();
int64_t
*
out_cols_data
=
out_cols
.
data
<
int64_t
>
();
T
*
out_values_data
=
out_values
.
data
<
T
>
();
const
int64_t
*
x_crows_data
=
x_crows
.
data
<
int64_t
>
();
const
int64_t
*
x_cols_data
=
x_cols
.
data
<
int64_t
>
();
const
T
*
x_values_data
=
x_values
.
data
<
T
>
();
int
*
d_perm
;
int64_t
*
d_x_dims
,
*
d_out_dims
;
#ifdef PADDLE_WITH_HIP
hipMalloc
(
reinterpret_cast
<
void
**>
(
&
d_perm
),
sizeof
(
int
)
*
perm
.
size
());
hipMemcpy
(
d_perm
,
perm
.
data
(),
sizeof
(
int
)
*
perm
.
size
(),
hipMemcpyHostToDevice
);
hipMalloc
(
reinterpret_cast
<
void
**>
(
&
d_x_dims
),
sizeof
(
int64_t
)
*
x
.
dims
().
size
());
hipMemcpy
(
d_x_dims
,
x
.
dims
().
Get
(),
sizeof
(
int64_t
)
*
x
.
dims
().
size
(),
hipMemcpyHostToDevice
);
hipMalloc
(
reinterpret_cast
<
void
**>
(
&
d_out_dims
),
sizeof
(
int64_t
)
*
out_dims
.
size
());
hipMemcpy
(
d_out_dims
,
out_dims
.
Get
(),
sizeof
(
int64_t
)
*
out_dims
.
size
(),
hipMemcpyHostToDevice
);
#else
cudaMalloc
(
reinterpret_cast
<
void
**>
(
&
d_perm
),
sizeof
(
int
)
*
perm
.
size
());
cudaMemcpy
(
d_perm
,
perm
.
data
(),
sizeof
(
int
)
*
perm
.
size
(),
cudaMemcpyHostToDevice
);
cudaMalloc
(
reinterpret_cast
<
void
**>
(
&
d_x_dims
),
sizeof
(
int64_t
)
*
x
.
dims
().
size
());
cudaMemcpy
(
d_x_dims
,
x
.
dims
().
Get
(),
sizeof
(
int64_t
)
*
x
.
dims
().
size
(),
cudaMemcpyHostToDevice
);
cudaMalloc
(
reinterpret_cast
<
void
**>
(
&
d_out_dims
),
sizeof
(
int64_t
)
*
out_dims
.
size
());
cudaMemcpy
(
d_out_dims
,
out_dims
.
Get
(),
sizeof
(
int64_t
)
*
out_dims
.
size
(),
cudaMemcpyHostToDevice
);
#endif
int64_t
x_nnz
=
x
.
nnz
();
auto
config
=
phi
::
backends
::
gpu
::
GetGpuLaunchConfig1D
(
dev_ctx
,
out_dims
[
0
],
1
);
if
(
perm
.
size
()
==
2
)
{
TransposeCsr2DCudaKernel
<
T
><<<
config
.
block_per_grid
.
x
,
config
.
thread_per_block
.
x
,
0
,
dev_ctx
.
stream
()
>>>
(
x_crows_data
,
x_cols_data
,
x_values_data
,
d_perm
,
d_x_dims
,
d_out_dims
,
x_nnz
,
out_crows_data
,
out_cols_data
,
out_values_data
);
}
else
{
TransposeCsr3DCudaKernel
<
T
><<<
1
,
1
,
0
,
dev_ctx
.
stream
()
>>>
(
x_crows_data
,
x_cols_data
,
x_values_data
,
d_perm
,
d_x_dims
,
d_out_dims
,
perm
.
size
(),
x_nnz
,
out_crows_data
,
out_cols_data
,
out_values_data
);
}
}
}
// namespace sparse
}
// namespace phi
PD_REGISTER_KERNEL
(
transpose_coo
,
GPU
,
ALL_LAYOUT
,
phi
::
sparse
::
TransposeCooKernel
,
phi
::
dtype
::
float16
,
float
,
double
,
int8_t
,
uint8_t
,
int16_t
,
int
,
int64_t
,
bool
)
{}
PD_REGISTER_KERNEL
(
transpose_csr
,
GPU
,
ALL_LAYOUT
,
phi
::
sparse
::
TransposeCsrKernel
,
phi
::
dtype
::
float16
,
float
,
double
,
int8_t
,
uint8_t
,
int16_t
,
int
,
int64_t
,
bool
)
{}
paddle/phi/kernels/sparse/unary_grad_kernel.h
浏览文件 @
2b879a69
...
...
@@ -77,5 +77,17 @@ void CastCsrGradKernel(const Context& dev_ctx,
DataType
value_dtype
,
SparseCsrTensor
*
dx
);
template
<
typename
T
,
typename
Context
>
void
TransposeCooGradKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
dout
,
const
std
::
vector
<
int
>&
perm
,
SparseCooTensor
*
dx
);
template
<
typename
T
,
typename
Context
>
void
TransposeCsrGradKernel
(
const
Context
&
dev_ctx
,
const
SparseCsrTensor
&
dout
,
const
std
::
vector
<
int
>&
perm
,
SparseCsrTensor
*
dx
);
}
// namespace sparse
}
// namespace phi
paddle/phi/kernels/sparse/unary_kernel.h
浏览文件 @
2b879a69
...
...
@@ -99,6 +99,48 @@ void CastCsrKernel(const Context& dev_ctx,
DataType
value_dtype
,
SparseCsrTensor
*
out
);
template
<
typename
T
,
typename
Context
>
void
TransposeCooKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
std
::
vector
<
int
>&
perm
,
SparseCooTensor
*
out
);
template
<
typename
T
,
typename
Context
>
void
TransposeCsrKernel
(
const
Context
&
dev_ctx
,
const
SparseCsrTensor
&
x
,
const
std
::
vector
<
int
>&
perm
,
SparseCsrTensor
*
out
);
template
<
typename
T
,
typename
Context
>
SparseCooTensor
TransposeCoo
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
std
::
vector
<
int
>&
perm
)
{
PADDLE_ENFORCE_EQ
(
x
.
sparse_dim
(),
perm
.
size
(),
phi
::
errors
::
InvalidArgument
(
"size of perm must be equal than the x.sparse_dim()"
));
SparseCooTensor
coo
;
TransposeCooKernel
<
T
,
Context
>
(
dev_ctx
,
x
,
perm
,
&
coo
);
return
coo
;
}
template
<
typename
T
,
typename
Context
>
SparseCsrTensor
TransposeCsr
(
const
Context
&
dev_ctx
,
const
SparseCsrTensor
&
x
,
const
std
::
vector
<
int
>&
perm
)
{
PADDLE_ENFORCE_LE
(
2
,
perm
.
size
(),
phi
::
errors
::
InvalidArgument
(
"size of perm must be equal to 2 or 3"
));
PADDLE_ENFORCE_GE
(
3
,
perm
.
size
(),
phi
::
errors
::
InvalidArgument
(
"size of perm must be equal to 2 or 3"
));
SparseCsrTensor
csr
;
TransposeCsrKernel
<
T
,
Context
>
(
dev_ctx
,
x
,
perm
,
&
csr
);
return
csr
;
}
template
<
typename
T
,
typename
Context
>
SparseCooTensor
ReluCoo
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
)
{
SparseCooTensor
coo
;
...
...
paddle/phi/tests/kernels/CMakeLists.txt
浏览文件 @
2b879a69
...
...
@@ -74,6 +74,10 @@ cc_test(
test_sparse_elementwise_dev_api
SRCS test_sparse_elementwise_dev_api.cc
DEPS phi phi_api_utils
)
cc_test
(
test_sparse_transpose_dev_api
SRCS test_sparse_transpose_dev_api.cc
DEPS phi phi_api_utils
)
cc_test
(
test_math_function
...
...
paddle/phi/tests/kernels/test_sparse_transpose_dev_api.cc
0 → 100644
浏览文件 @
2b879a69
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include <memory>
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/phi/api/lib/utils/allocator.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/empty_kernel.h"
#include "paddle/phi/kernels/sparse/empty_kernel.h"
#include "paddle/phi/kernels/sparse/sparse_utils_kernel.h"
#include "paddle/phi/kernels/sparse/unary_grad_kernel.h"
#include "paddle/phi/kernels/sparse/unary_kernel.h"
#include "paddle/phi/kernels/transpose_grad_kernel.h"
#include "paddle/phi/kernels/transpose_kernel.h"
namespace
phi
{
namespace
tests
{
TEST
(
DEV_API
,
sparse_transpose_coo
)
{
std
::
vector
<
float
>
data
=
{
0
,
-
1
,
0
,
2
,
0
,
0
,
-
3
,
0
,
4
,
5
,
0
,
0
};
phi
::
CPUContext
dev_ctx_cpu
;
dev_ctx_cpu
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
dev_ctx_cpu
.
SetHostAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
DenseTensor
dense_x
=
phi
::
Empty
(
dev_ctx_cpu
,
DenseTensorMeta
(
DataType
::
FLOAT32
,
phi
::
make_ddim
({
3
,
2
,
2
}),
DataLayout
::
NCHW
));
memcpy
(
dense_x
.
data
<
float
>
(),
data
.
data
(),
data
.
size
()
*
sizeof
(
float
));
auto
sparse_coo
=
sparse
::
DenseToCoo
<
float
>
(
dev_ctx_cpu
,
dense_x
,
3
);
auto
sparse_out
=
sparse
::
TransposeCoo
<
float
>
(
dev_ctx_cpu
,
sparse_coo
,
{
2
,
1
,
0
});
DenseTensor
dense_out
=
phi
::
Empty
(
dev_ctx_cpu
,
DenseTensorMeta
(
DataType
::
FLOAT32
,
phi
::
make_ddim
({
2
,
2
,
3
}),
DataLayout
::
NCHW
));
TransposeKernel
<
float
>
(
dev_ctx_cpu
,
dense_x
,
{
2
,
1
,
0
},
&
dense_out
);
// backward
DenseTensor
dense_grad_x
=
phi
::
EmptyLike
<
float
>
(
dev_ctx_cpu
,
dense_out
);
TransposeGradKernel
<
float
>
(
dev_ctx_cpu
,
dense_out
,
{
2
,
1
,
0
},
&
dense_grad_x
);
SparseCooTensor
sparse_grad_x
;
sparse
::
EmptyLikeCooKernel
<
float
>
(
dev_ctx_cpu
,
sparse_coo
,
&
sparse_grad_x
);
SparseCooTensor
sparse_out_grad
(
sparse_coo
.
indices
(),
sparse_coo
.
values
(),
{
2
,
2
,
3
});
sparse
::
TransposeCooGradKernel
<
float
>
(
dev_ctx_cpu
,
sparse_out_grad
,
{
2
,
1
,
0
},
&
sparse_grad_x
);
}
TEST
(
DEV_API
,
sparse_transpose_csr_case1
)
{
std
::
vector
<
float
>
data
=
{
0
,
-
1
,
0
,
2
,
0
,
0
,
-
3
,
0
,
4
,
5
,
0
,
0
};
phi
::
CPUContext
dev_ctx_cpu
;
dev_ctx_cpu
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
dev_ctx_cpu
.
SetHostAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
DenseTensor
dense_x
=
phi
::
Empty
(
dev_ctx_cpu
,
DenseTensorMeta
(
DataType
::
FLOAT32
,
phi
::
make_ddim
({
3
,
2
,
2
}),
DataLayout
::
NCHW
));
memcpy
(
dense_x
.
data
<
float
>
(),
data
.
data
(),
data
.
size
()
*
sizeof
(
float
));
auto
sparse_csr
=
sparse
::
DenseToCsr
<
float
>
(
dev_ctx_cpu
,
dense_x
);
auto
sparse_out
=
sparse
::
TransposeCsr
<
float
>
(
dev_ctx_cpu
,
sparse_csr
,
{
2
,
1
,
0
});
DenseTensor
dense_out
=
phi
::
Empty
(
dev_ctx_cpu
,
DenseTensorMeta
(
DataType
::
FLOAT32
,
phi
::
make_ddim
({
2
,
2
,
3
}),
DataLayout
::
NCHW
));
TransposeKernel
<
float
>
(
dev_ctx_cpu
,
dense_x
,
{
2
,
1
,
0
},
&
dense_out
);
// backward
DenseTensor
dense_grad_x
=
phi
::
EmptyLike
<
float
>
(
dev_ctx_cpu
,
dense_out
);
TransposeGradKernel
<
float
>
(
dev_ctx_cpu
,
dense_out
,
{
2
,
1
,
0
},
&
dense_grad_x
);
SparseCsrTensor
sparse_grad_x
;
sparse
::
EmptyLikeCsrKernel
<
float
>
(
dev_ctx_cpu
,
sparse_csr
,
&
sparse_grad_x
);
sparse
::
TransposeCsrGradKernel
<
float
>
(
dev_ctx_cpu
,
sparse_out
,
{
2
,
1
,
0
},
&
sparse_grad_x
);
}
TEST
(
DEV_API
,
sparse_transpose_csr_case2
)
{
std
::
vector
<
float
>
data
=
{
0
,
-
1
,
0
,
2
,
0
,
0
,
-
3
,
0
,
4
,
5
,
0
,
0
};
phi
::
CPUContext
dev_ctx_cpu
;
dev_ctx_cpu
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
dev_ctx_cpu
.
SetHostAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
DenseTensor
dense_x
=
phi
::
Empty
(
dev_ctx_cpu
,
DenseTensorMeta
(
DataType
::
FLOAT32
,
phi
::
make_ddim
({
3
,
2
,
2
}),
DataLayout
::
NCHW
));
memcpy
(
dense_x
.
data
<
float
>
(),
data
.
data
(),
data
.
size
()
*
sizeof
(
float
));
auto
sparse_csr
=
sparse
::
DenseToCsr
<
float
>
(
dev_ctx_cpu
,
dense_x
);
auto
sparse_out
=
sparse
::
TransposeCsr
<
float
>
(
dev_ctx_cpu
,
sparse_csr
,
{
1
,
2
,
0
});
DenseTensor
dense_out
=
phi
::
Empty
(
dev_ctx_cpu
,
DenseTensorMeta
(
DataType
::
FLOAT32
,
phi
::
make_ddim
({
2
,
2
,
3
}),
DataLayout
::
NCHW
));
TransposeKernel
<
float
>
(
dev_ctx_cpu
,
dense_x
,
{
1
,
2
,
0
},
&
dense_out
);
}
TEST
(
DEV_API
,
sparse_transpose_csr_case3
)
{
std
::
vector
<
float
>
data
=
{
0
,
-
1
,
0
,
2
,
0
,
0
,
-
3
,
0
,
4
,
5
,
0
,
0
};
phi
::
CPUContext
dev_ctx_cpu
;
dev_ctx_cpu
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
dev_ctx_cpu
.
SetHostAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
DenseTensor
dense_x
=
phi
::
Empty
(
dev_ctx_cpu
,
DenseTensorMeta
(
DataType
::
FLOAT32
,
phi
::
make_ddim
({
3
,
4
}),
DataLayout
::
NCHW
));
memcpy
(
dense_x
.
data
<
float
>
(),
data
.
data
(),
data
.
size
()
*
sizeof
(
float
));
auto
sparse_csr
=
sparse
::
DenseToCsr
<
float
>
(
dev_ctx_cpu
,
dense_x
);
auto
sparse_out
=
sparse
::
TransposeCsr
<
float
>
(
dev_ctx_cpu
,
sparse_csr
,
{
1
,
0
});
DenseTensor
dense_out
=
phi
::
Empty
(
dev_ctx_cpu
,
DenseTensorMeta
(
DataType
::
FLOAT32
,
phi
::
make_ddim
({
4
,
3
}),
DataLayout
::
NCHW
));
TransposeKernel
<
float
>
(
dev_ctx_cpu
,
dense_x
,
{
1
,
0
},
&
dense_out
);
}
}
// namespace tests
}
// namespace phi
python/paddle/fluid/tests/unittests/test_sparse_transpose_op.py
0 → 100644
浏览文件 @
2b879a69
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
paddle
import
numpy
as
np
import
unittest
from
paddle.fluid.framework
import
_test_eager_guard
class
TestTranspose
(
unittest
.
TestCase
):
# x: sparse, out: sparse
def
check_result
(
self
,
x_shape
,
dims
,
format
):
with
_test_eager_guard
():
mask
=
paddle
.
randint
(
0
,
2
,
x_shape
).
astype
(
"float32"
)
origin_x
=
paddle
.
rand
(
x_shape
,
dtype
=
'float32'
)
*
mask
dense_x
=
origin_x
.
detach
()
dense_x
.
stop_gradient
=
False
dense_out
=
paddle
.
transpose
(
dense_x
,
dims
)
if
format
==
"coo"
:
sp_x
=
origin_x
.
detach
().
to_sparse_coo
(
len
(
x_shape
))
else
:
sp_x
=
origin_x
.
detach
().
to_sparse_csr
()
sp_x
.
stop_gradient
=
False
sp_out
=
paddle
.
incubate
.
sparse
.
transpose
(
sp_x
,
dims
)
np
.
testing
.
assert_allclose
(
sp_out
.
to_dense
().
numpy
(),
dense_out
.
numpy
(),
rtol
=
1e-05
)
dense_out
.
backward
()
sp_out
.
backward
()
np
.
testing
.
assert_allclose
(
sp_x
.
grad
.
to_dense
().
numpy
(),
(
dense_x
.
grad
*
mask
).
numpy
(),
rtol
=
1e-05
)
def
test_transpose_2d
(
self
):
self
.
check_result
([
2
,
5
],
[
0
,
1
],
'coo'
)
self
.
check_result
([
2
,
5
],
[
0
,
1
],
'csr'
)
self
.
check_result
([
2
,
5
],
[
1
,
0
],
'coo'
)
self
.
check_result
([
2
,
5
],
[
1
,
0
],
'csr'
)
def
test_transpose_3d
(
self
):
self
.
check_result
([
6
,
2
,
3
],
[
0
,
1
,
2
],
'coo'
)
self
.
check_result
([
6
,
2
,
3
],
[
0
,
1
,
2
],
'csr'
)
self
.
check_result
([
6
,
2
,
3
],
[
0
,
2
,
1
],
'coo'
)
self
.
check_result
([
6
,
2
,
3
],
[
0
,
2
,
1
],
'csr'
)
self
.
check_result
([
6
,
2
,
3
],
[
1
,
0
,
2
],
'coo'
)
self
.
check_result
([
6
,
2
,
3
],
[
1
,
0
,
2
],
'csr'
)
self
.
check_result
([
6
,
2
,
3
],
[
2
,
0
,
1
],
'coo'
)
self
.
check_result
([
6
,
2
,
3
],
[
2
,
0
,
1
],
'csr'
)
self
.
check_result
([
6
,
2
,
3
],
[
2
,
1
,
0
],
'coo'
)
self
.
check_result
([
6
,
2
,
3
],
[
2
,
1
,
0
],
'csr'
)
self
.
check_result
([
6
,
2
,
3
],
[
1
,
2
,
0
],
'coo'
)
self
.
check_result
([
6
,
2
,
3
],
[
1
,
2
,
0
],
'csr'
)
def
test_transpose_nd
(
self
):
self
.
check_result
([
8
,
3
,
4
,
4
,
5
,
3
],
[
5
,
3
,
4
,
1
,
0
,
2
],
'coo'
)
# Randint now only supports access to dimension 0 to 9.
self
.
check_result
([
2
,
3
,
4
,
2
,
3
,
4
,
2
,
3
,
4
],
[
2
,
3
,
4
,
5
,
6
,
7
,
8
,
0
,
1
],
'coo'
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/incubate/sparse/__init__.py
浏览文件 @
2b879a69
...
...
@@ -34,6 +34,7 @@ from .unary import coalesce
from
.unary
import
deg2rad
from
.unary
import
rad2deg
from
.unary
import
expm1
from
.unary
import
transpose
from
.binary
import
mv
from
.binary
import
matmul
...
...
@@ -75,6 +76,7 @@ __all__ = [
'addmm'
,
'add'
,
'subtract'
,
'transpose'
,
'multiply'
,
'divide'
,
'coalesce'
,
...
...
python/paddle/incubate/sparse/unary.py
浏览文件 @
2b879a69
...
...
@@ -119,6 +119,37 @@ def asin(x, name=None):
return
_C_ops
.
sparse_asin
(
x
)
@
dygraph_only
def
transpose
(
x
,
perm
,
name
=
None
):
"""
Changes the perm order of ``x`` without changing its data, requiring x to be a SparseCooTensor or SparseCsrTensor.
.. math::
out = transpose(x, perm)
Parameters:
x (Tensor): The input Sparse Tensor with data type float32, float64.
perm (list|tuple): Permute the input according to the data of perm.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
Returns:
A transposed Sparse Tensor with the same data type as ``x``.
Examples:
.. code-block:: python
import paddle
dense_x = paddle.to_tensor([[-2., 0.], [1., 2.]])
sparse_x = dense_x.to_sparse_coo(1)
out = paddle.incubate.sparse.transpose(sparse_x, [1, 0])
"""
return
_C_ops
.
sparse_transpose
(
x
,
perm
)
@
dygraph_only
def
atan
(
x
,
name
=
None
):
"""
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录