Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
ec3e0a13
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
ec3e0a13
编写于
6月 20, 2022
作者:
Z
zhangbopd
提交者:
GitHub
6月 20, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add cross_op cuda kernel (#43558)
上级
abbd4abb
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
448 addition
and
233 deletion
+448
-233
paddle/fluid/operators/cross_op.cc
paddle/fluid/operators/cross_op.cc
+12
-0
paddle/phi/kernels/cpu/cross_grad_kernel.cc
paddle/phi/kernels/cpu/cross_grad_kernel.cc
+96
-1
paddle/phi/kernels/cpu/cross_kernel.cc
paddle/phi/kernels/cpu/cross_kernel.cc
+89
-1
paddle/phi/kernels/gpu/cross_grad_kernel.cu
paddle/phi/kernels/gpu/cross_grad_kernel.cu
+133
-1
paddle/phi/kernels/gpu/cross_kernel.cu
paddle/phi/kernels/gpu/cross_kernel.cu
+118
-1
paddle/phi/kernels/impl/cross_grad_kernel_impl.h
paddle/phi/kernels/impl/cross_grad_kernel_impl.h
+0
-113
paddle/phi/kernels/impl/cross_kernel_impl.h
paddle/phi/kernels/impl/cross_kernel_impl.h
+0
-116
未找到文件。
paddle/fluid/operators/cross_op.cc
浏览文件 @
ec3e0a13
...
@@ -61,6 +61,18 @@ class CrossGradOp : public framework::OperatorWithKernel {
...
@@ -61,6 +61,18 @@ class CrossGradOp : public framework::OperatorWithKernel {
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
ctx
->
GetInputDim
(
"X"
));
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
ctx
->
GetInputDim
(
"X"
));
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"Y"
),
ctx
->
GetInputDim
(
"Y"
));
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"Y"
),
ctx
->
GetInputDim
(
"Y"
));
auto
x_dims
=
ctx
->
GetInputsDim
(
"X"
);
auto
y_dims
=
ctx
->
GetInputsDim
(
"Y"
);
for
(
size_t
i
=
0
;
i
<
x_dims
.
size
();
++
i
)
{
PADDLE_ENFORCE_EQ
(
x_dims
[
i
],
y_dims
[
i
],
phi
::
errors
::
InvalidArgument
(
"The 'shape' of Input(X) should be equal to "
"the 'shape' of Input(Y). But received "
"Input(X).dimensions = [%s], "
"Input(Y).dimensions = [%s]"
,
x_dims
[
i
],
y_dims
[
i
]));
}
}
}
protected:
protected:
...
...
paddle/phi/kernels/cpu/cross_grad_kernel.cc
浏览文件 @
ec3e0a13
...
@@ -14,10 +14,105 @@
...
@@ -14,10 +14,105 @@
#include "paddle/phi/kernels/cross_grad_kernel.h"
#include "paddle/phi/kernels/cross_grad_kernel.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/cross_grad_kernel_impl.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
CrossGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
const
DenseTensor
&
out_grad
,
int
axis
,
DenseTensor
*
x_grad
,
DenseTensor
*
y_grad
)
{
auto
&
input_x
=
x
;
auto
&
input_y
=
y
;
auto
&
input_out_grad
=
out_grad
;
auto
*
output_x_grad
=
x_grad
;
auto
*
output_y_grad
=
y_grad
;
int
dim
=
axis
;
auto
input_x_dims
=
input_x
.
dims
();
if
(
dim
!=
DDim
::
kMaxRank
)
{
PADDLE_ENFORCE_EQ
(
dim
<
input_x_dims
.
size
()
&&
dim
>=
(
0
-
input_x_dims
.
size
()),
true
,
errors
::
OutOfRange
(
"Attr(dim) is out of range, It's expected "
"to be in range of [-%d, %d]. But received Attr(dim) = %d."
,
input_x_dims
.
size
(),
input_x_dims
.
size
()
-
1
,
dim
));
if
(
dim
<
0
)
{
dim
+=
input_x_dims
.
size
();
}
PADDLE_ENFORCE_EQ
(
input_x_dims
[
dim
]
==
3
,
true
,
errors
::
InvalidArgument
(
"Input(X/Y).dims[dim] must be equal to 3. But received: "
"Input(X/Y).dims[dim] = [%d]."
,
input_x_dims
[
dim
]));
}
else
{
for
(
auto
i
=
0
;
i
<
input_x_dims
.
size
();
i
++
)
{
if
(
input_x_dims
[
i
]
==
3
)
{
dim
=
i
;
break
;
}
}
PADDLE_ENFORCE_EQ
(
dim
==
DDim
::
kMaxRank
,
false
,
errors
::
InvalidArgument
(
"There must be at least one dimension 'd' "
"so that Input(X/Y).dims()[d] is equal to 3. "
"But received: Input(X/Y).dims() == [%s]."
,
input_x_dims
));
}
auto
outer_loops
=
1
;
for
(
auto
i
=
0
;
i
<
dim
;
i
++
)
{
outer_loops
*=
input_x_dims
[
i
];
}
auto
slice_size
=
1
;
for
(
auto
i
=
dim
+
1
;
i
<
input_x_dims
.
size
();
i
++
)
{
slice_size
*=
input_x_dims
[
i
];
}
std
::
vector
<
T
>
input_x_vec
,
input_y_vec
,
input_dout_vec
;
paddle
::
framework
::
TensorToVector
(
input_x
,
dev_ctx
,
&
input_x_vec
);
paddle
::
framework
::
TensorToVector
(
input_y
,
dev_ctx
,
&
input_y_vec
);
paddle
::
framework
::
TensorToVector
(
input_out_grad
,
dev_ctx
,
&
input_dout_vec
);
std
::
vector
<
T
>
out_dx_vec
(
output_x_grad
->
numel
());
std
::
vector
<
T
>
out_dy_vec
(
output_y_grad
->
numel
());
dev_ctx
.
template
Alloc
<
T
>(
output_x_grad
);
dev_ctx
.
template
Alloc
<
T
>(
output_y_grad
);
for
(
auto
i
=
0
;
i
<
outer_loops
;
i
++
)
{
for
(
auto
j
=
0
;
j
<
3
;
j
++
)
{
auto
dst_pos
=
(
3
*
i
+
j
)
*
slice_size
;
auto
in_pos1
=
(
3
*
i
+
((
j
+
1
)
%
3
))
*
slice_size
;
auto
in_pos2
=
(
3
*
i
+
((
j
+
2
)
%
3
))
*
slice_size
;
for
(
auto
k
=
0
;
k
<
slice_size
;
k
++
)
{
out_dx_vec
[
dst_pos
+
k
]
=
input_dout_vec
[
in_pos2
+
k
]
*
input_y_vec
[
in_pos1
+
k
]
-
input_dout_vec
[
in_pos1
+
k
]
*
input_y_vec
[
in_pos2
+
k
];
out_dy_vec
[
dst_pos
+
k
]
=
input_dout_vec
[
in_pos1
+
k
]
*
input_x_vec
[
in_pos2
+
k
]
-
input_dout_vec
[
in_pos2
+
k
]
*
input_x_vec
[
in_pos1
+
k
];
}
}
}
paddle
::
framework
::
TensorFromVector
(
out_dx_vec
,
dev_ctx
,
output_x_grad
);
paddle
::
framework
::
TensorFromVector
(
out_dy_vec
,
dev_ctx
,
output_y_grad
);
output_x_grad
->
Resize
(
input_x_dims
);
output_y_grad
->
Resize
(
input_x_dims
);
}
}
// namespace phi
PD_REGISTER_KERNEL
(
cross_grad
,
PD_REGISTER_KERNEL
(
cross_grad
,
CPU
,
CPU
,
ALL_LAYOUT
,
ALL_LAYOUT
,
...
...
paddle/phi/kernels/cpu/cross_kernel.cc
浏览文件 @
ec3e0a13
...
@@ -14,9 +14,97 @@
...
@@ -14,9 +14,97 @@
#include "paddle/phi/kernels/cross_kernel.h"
#include "paddle/phi/kernels/cross_kernel.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/cross_kernel_impl.h"
#include "paddle/phi/kernels/funcs/common_shape.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
CrossKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
int
axis
,
DenseTensor
*
out
)
{
auto
&
input_x
=
x
;
auto
&
input_y
=
y
;
auto
*
output
=
out
;
int
dim
=
axis
;
auto
input_x_dims
=
input_x
.
dims
();
if
(
dim
!=
DDim
::
kMaxRank
)
{
PADDLE_ENFORCE_EQ
(
dim
<
input_x_dims
.
size
()
&&
dim
>=
(
0
-
input_x_dims
.
size
()),
true
,
phi
::
errors
::
OutOfRange
(
"Attr(dim) is out of range, It's expected "
"to be in range of [-%d, %d]. But received Attr(dim) = %d."
,
input_x_dims
.
size
(),
input_x_dims
.
size
()
-
1
,
dim
));
if
(
dim
<
0
)
{
dim
+=
input_x_dims
.
size
();
}
PADDLE_ENFORCE_EQ
(
input_x_dims
[
dim
]
==
3
,
true
,
phi
::
errors
::
InvalidArgument
(
"Input(X/Y).dims[dim] must be equal to 3. But received: "
"Input(X/Y).dims[dim] = [%d]."
,
input_x_dims
[
dim
]));
}
else
{
for
(
auto
i
=
0
;
i
<
input_x_dims
.
size
();
i
++
)
{
if
(
input_x_dims
[
i
]
==
3
)
{
dim
=
i
;
break
;
}
}
PADDLE_ENFORCE_EQ
(
dim
==
DDim
::
kMaxRank
,
false
,
phi
::
errors
::
InvalidArgument
(
"There must be at least one dimension 'd' so that "
"Input(X/Y).dims()[d] is equal to 3. "
"But received: Input(X/Y).dims() == [%s]."
,
input_x_dims
));
}
auto
outer_loops
=
1
;
for
(
auto
i
=
0
;
i
<
dim
;
i
++
)
{
outer_loops
*=
input_x_dims
[
i
];
}
auto
slice_size
=
1
;
for
(
auto
i
=
dim
+
1
;
i
<
input_x_dims
.
size
();
i
++
)
{
slice_size
*=
input_x_dims
[
i
];
}
std
::
vector
<
T
>
input_x_vec
,
input_y_vec
;
paddle
::
framework
::
TensorToVector
(
input_x
,
dev_ctx
,
&
input_x_vec
);
paddle
::
framework
::
TensorToVector
(
input_y
,
dev_ctx
,
&
input_y_vec
);
std
::
vector
<
T
>
out_vec
(
output
->
numel
());
dev_ctx
.
template
Alloc
<
T
>(
output
);
for
(
auto
i
=
0
;
i
<
outer_loops
;
i
++
)
{
for
(
auto
j
=
0
;
j
<
3
;
j
++
)
{
auto
dst_pos
=
(
3
*
i
+
j
)
*
slice_size
;
auto
in_pos1
=
(
3
*
i
+
((
j
+
1
)
%
3
))
*
slice_size
;
auto
in_pos2
=
(
3
*
i
+
((
j
+
2
)
%
3
))
*
slice_size
;
for
(
auto
k
=
0
;
k
<
slice_size
;
k
++
)
{
out_vec
[
dst_pos
+
k
]
=
input_x_vec
[
in_pos1
+
k
]
*
input_y_vec
[
in_pos2
+
k
]
-
input_x_vec
[
in_pos2
+
k
]
*
input_y_vec
[
in_pos1
+
k
];
}
}
}
paddle
::
framework
::
TensorFromVector
(
out_vec
,
dev_ctx
,
output
);
output
->
Resize
(
input_x_dims
);
}
}
// namespace phi
PD_REGISTER_KERNEL
(
PD_REGISTER_KERNEL
(
cross
,
CPU
,
ALL_LAYOUT
,
phi
::
CrossKernel
,
float
,
double
,
int
,
int64_t
)
{}
cross
,
CPU
,
ALL_LAYOUT
,
phi
::
CrossKernel
,
float
,
double
,
int
,
int64_t
)
{}
paddle/phi/kernels/gpu/cross_grad_kernel.cu
浏览文件 @
ec3e0a13
...
@@ -13,9 +13,141 @@
...
@@ -13,9 +13,141 @@
// limitations under the License.
// limitations under the License.
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/cross_grad_kernel.h"
#include "paddle/phi/kernels/cross_grad_kernel.h"
#include "paddle/phi/kernels/impl/cross_grad_kernel_impl.h"
#include "paddle/phi/kernels/funcs/reduce_function.h"
namespace
phi
{
using
funcs
::
IndexCalculator
;
template
<
typename
T
>
__global__
void
CrossGrad
(
const
T
*
x
,
const
T
*
y
,
const
T
*
out
,
T
*
out_dx
,
T
*
out_dy
,
const
int
stride
,
const
int
N
,
IndexCalculator
index_calculator
)
{
CUDA_KERNEL_LOOP
(
i
,
N
)
{
int
offset
=
index_calculator
(
i
);
auto
pos0
=
offset
+
0
*
stride
;
auto
pos1
=
offset
+
1
*
stride
;
auto
pos2
=
offset
+
2
*
stride
;
out_dx
[
pos0
]
=
out
[
pos2
]
*
y
[
pos1
]
-
out
[
pos1
]
*
y
[
pos2
];
out_dy
[
pos0
]
=
out
[
pos1
]
*
x
[
pos2
]
-
out
[
pos2
]
*
x
[
pos1
];
out_dx
[
pos1
]
=
out
[
pos0
]
*
y
[
pos2
]
-
out
[
pos2
]
*
y
[
pos0
];
out_dy
[
pos1
]
=
out
[
pos2
]
*
x
[
pos0
]
-
out
[
pos0
]
*
x
[
pos2
];
out_dx
[
pos2
]
=
out
[
pos1
]
*
y
[
pos0
]
-
out
[
pos0
]
*
y
[
pos1
];
out_dy
[
pos2
]
=
out
[
pos0
]
*
x
[
pos1
]
-
out
[
pos1
]
*
x
[
pos0
];
}
}
template
<
typename
T
,
typename
Context
>
void
CrossGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
const
DenseTensor
&
out_grad
,
int
axis
,
DenseTensor
*
x_grad
,
DenseTensor
*
y_grad
)
{
auto
&
input_x
=
x
;
auto
&
input_y
=
y
;
auto
&
input_out_grad
=
out_grad
;
auto
*
output_x_grad
=
x_grad
;
auto
*
output_y_grad
=
y_grad
;
int
dim
=
axis
;
auto
input_x_dims
=
input_x
.
dims
();
if
(
dim
!=
DDim
::
kMaxRank
)
{
PADDLE_ENFORCE_EQ
(
dim
<
input_x_dims
.
size
()
&&
dim
>=
(
0
-
input_x_dims
.
size
()),
true
,
errors
::
OutOfRange
(
"Attr(dim) is out of range, It's expected "
"to be in range of [-%d, %d]. But received Attr(dim) = %d."
,
input_x_dims
.
size
(),
input_x_dims
.
size
()
-
1
,
dim
));
if
(
dim
<
0
)
{
dim
+=
input_x_dims
.
size
();
}
PADDLE_ENFORCE_EQ
(
input_x_dims
[
dim
]
==
3
,
true
,
errors
::
InvalidArgument
(
"Input(X/Y).dims[dim] must be equal to 3. But received: "
"Input(X/Y).dims[dim] = [%d]."
,
input_x_dims
[
dim
]));
}
else
{
for
(
auto
i
=
0
;
i
<
input_x_dims
.
size
();
i
++
)
{
if
(
input_x_dims
[
i
]
==
3
)
{
dim
=
i
;
break
;
}
}
PADDLE_ENFORCE_EQ
(
dim
==
DDim
::
kMaxRank
,
false
,
errors
::
InvalidArgument
(
"There must be at least one dimension 'd' "
"so that Input(X/Y).dims()[d] is equal to 3. "
"But received: Input(X/Y).dims() == [%s]."
,
input_x_dims
));
}
std
::
vector
<
int
>
cal_dims
;
std
::
vector
<
int
>
left_strides
;
std
::
vector
<
int
>
full_strides
;
int
full_dim
=
1
;
int
left_dim
=
1
;
for
(
auto
i
=
0
;
i
<
input_x_dims
.
size
();
i
++
)
{
full_strides
.
insert
(
full_strides
.
begin
(),
full_dim
);
full_dim
*=
input_x_dims
[
input_x_dims
.
size
()
-
i
-
1
];
if
(
i
==
dim
)
{
continue
;
}
cal_dims
.
push_back
(
i
);
left_strides
.
insert
(
left_strides
.
begin
(),
left_dim
);
left_dim
*=
input_x_dims
[
input_x_dims
.
size
()
-
i
-
1
];
}
const
auto
*
input_x_data
=
input_x
.
data
<
T
>
();
const
auto
*
input_y_data
=
input_y
.
data
<
T
>
();
const
auto
*
input_out_grad_data
=
input_out_grad
.
data
<
T
>
();
auto
*
output_x_grad_data
=
dev_ctx
.
template
Alloc
<
T
>(
x_grad
);
auto
*
output_y_grad_data
=
dev_ctx
.
template
Alloc
<
T
>(
y_grad
);
auto
index_calculator
=
IndexCalculator
(
input_x_dims
.
size
()
-
1
,
cal_dims
,
left_strides
,
full_strides
);
int64_t
numel
=
x
.
numel
();
backends
::
gpu
::
GpuLaunchConfig
config
=
backends
::
gpu
::
GetGpuLaunchConfig1D
(
dev_ctx
,
numel
/
3
);
CrossGrad
<<<
config
.
block_per_grid
,
config
.
thread_per_block
,
0
,
dev_ctx
.
stream
()
>>>
(
input_x_data
,
input_y_data
,
input_out_grad_data
,
output_x_grad_data
,
output_y_grad_data
,
full_strides
[
dim
],
numel
/
3
,
index_calculator
);
}
}
// namespace phi
PD_REGISTER_KERNEL
(
cross_grad
,
PD_REGISTER_KERNEL
(
cross_grad
,
GPU
,
GPU
,
...
...
paddle/phi/kernels/gpu/cross_kernel.cu
浏览文件 @
ec3e0a13
...
@@ -13,9 +13,126 @@
...
@@ -13,9 +13,126 @@
// limitations under the License.
// limitations under the License.
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/cross_kernel.h"
#include "paddle/phi/kernels/cross_kernel.h"
#include "paddle/phi/kernels/impl/cross_kernel_impl.h"
#include "paddle/phi/kernels/funcs/reduce_function.h"
namespace
phi
{
using
funcs
::
IndexCalculator
;
template
<
typename
T
>
__global__
void
Cross
(
const
T
*
x
,
const
T
*
y
,
T
*
out
,
const
int
stride
,
const
int
N
,
IndexCalculator
index_calculator
)
{
CUDA_KERNEL_LOOP
(
i
,
N
)
{
int
offset
=
index_calculator
(
i
);
auto
pos0
=
offset
+
0
*
stride
;
auto
pos1
=
offset
+
1
*
stride
;
auto
pos2
=
offset
+
2
*
stride
;
out
[
pos0
]
=
x
[
pos1
]
*
y
[
pos2
]
-
x
[
pos2
]
*
y
[
pos1
];
out
[
pos1
]
=
x
[
pos2
]
*
y
[
pos0
]
-
x
[
pos0
]
*
y
[
pos2
];
out
[
pos2
]
=
x
[
pos0
]
*
y
[
pos1
]
-
x
[
pos1
]
*
y
[
pos0
];
}
}
template
<
typename
T
,
typename
Context
>
void
CrossKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
int
axis
,
DenseTensor
*
out
)
{
auto
&
input_x
=
x
;
auto
&
input_y
=
y
;
auto
*
output
=
out
;
int
dim
=
axis
;
auto
input_x_dims
=
input_x
.
dims
();
if
(
dim
!=
DDim
::
kMaxRank
)
{
PADDLE_ENFORCE_EQ
(
dim
<
input_x_dims
.
size
()
&&
dim
>=
(
0
-
input_x_dims
.
size
()),
true
,
phi
::
errors
::
OutOfRange
(
"Attr(dim) is out of range, It's expected "
"to be in range of [-%d, %d]. But received Attr(dim) = %d."
,
input_x_dims
.
size
(),
input_x_dims
.
size
()
-
1
,
dim
));
if
(
dim
<
0
)
{
dim
+=
input_x_dims
.
size
();
}
PADDLE_ENFORCE_EQ
(
input_x_dims
[
dim
]
==
3
,
true
,
phi
::
errors
::
InvalidArgument
(
"Input(X/Y).dims[dim] must be equal to 3. But received: "
"Input(X/Y).dims[dim] = [%d]."
,
input_x_dims
[
dim
]));
}
else
{
for
(
auto
i
=
0
;
i
<
input_x_dims
.
size
();
i
++
)
{
if
(
input_x_dims
[
i
]
==
3
)
{
dim
=
i
;
break
;
}
}
PADDLE_ENFORCE_EQ
(
dim
==
DDim
::
kMaxRank
,
false
,
phi
::
errors
::
InvalidArgument
(
"There must be at least one dimension 'd' so that "
"Input(X/Y).dims()[d] is equal to 3. "
"But received: Input(X/Y).dims() == [%s]."
,
input_x_dims
));
}
std
::
vector
<
int
>
cal_dims
;
std
::
vector
<
int
>
left_strides
;
std
::
vector
<
int
>
full_strides
;
int
dims0
=
1
;
int
dims1
=
1
;
for
(
auto
i
=
0
;
i
<
input_x_dims
.
size
();
i
++
)
{
full_strides
.
insert
(
full_strides
.
begin
(),
dims0
);
dims0
*=
input_x_dims
[
input_x_dims
.
size
()
-
i
-
1
];
if
(
i
==
dim
)
{
continue
;
}
cal_dims
.
push_back
(
i
);
left_strides
.
insert
(
left_strides
.
begin
(),
dims1
);
dims1
*=
input_x_dims
[
input_x_dims
.
size
()
-
i
-
1
];
}
const
auto
*
input_x_data
=
input_x
.
data
<
T
>
();
const
auto
*
input_y_data
=
input_y
.
data
<
T
>
();
auto
*
out_data
=
dev_ctx
.
template
Alloc
<
T
>(
out
);
auto
index_calculator
=
IndexCalculator
(
input_x_dims
.
size
()
-
1
,
cal_dims
,
left_strides
,
full_strides
);
int64_t
numel
=
x
.
numel
();
backends
::
gpu
::
GpuLaunchConfig
config
=
backends
::
gpu
::
GetGpuLaunchConfig1D
(
dev_ctx
,
numel
/
3
);
Cross
<<<
config
.
block_per_grid
,
config
.
thread_per_block
,
0
,
dev_ctx
.
stream
()
>>>
(
input_x_data
,
input_y_data
,
out_data
,
full_strides
[
dim
],
numel
/
3
,
index_calculator
);
}
}
// namespace phi
PD_REGISTER_KERNEL
(
PD_REGISTER_KERNEL
(
cross
,
GPU
,
ALL_LAYOUT
,
phi
::
CrossKernel
,
float
,
double
,
int
,
int64_t
)
{}
cross
,
GPU
,
ALL_LAYOUT
,
phi
::
CrossKernel
,
float
,
double
,
int
,
int64_t
)
{}
paddle/phi/kernels/impl/cross_grad_kernel_impl.h
已删除
100644 → 0
浏览文件 @
abbd4abb
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/core/dense_tensor.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
CrossGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
const
DenseTensor
&
out_grad
,
int
axis
,
DenseTensor
*
x_grad
,
DenseTensor
*
y_grad
)
{
auto
&
input_x
=
x
;
auto
&
input_y
=
y
;
auto
&
input_out_grad
=
out_grad
;
auto
*
output_x_grad
=
x_grad
;
auto
*
output_y_grad
=
y_grad
;
int
dim
=
axis
;
auto
input_x_dims
=
input_x
.
dims
();
if
(
dim
!=
DDim
::
kMaxRank
)
{
PADDLE_ENFORCE_EQ
(
dim
<
input_x_dims
.
size
()
&&
dim
>=
(
0
-
input_x_dims
.
size
()),
true
,
errors
::
OutOfRange
(
"Attr(dim) is out of range, It's expected "
"to be in range of [-%d, %d]. But received Attr(dim) = %d."
,
input_x_dims
.
size
(),
input_x_dims
.
size
()
-
1
,
dim
));
if
(
dim
<
0
)
{
dim
+=
input_x_dims
.
size
();
}
PADDLE_ENFORCE_EQ
(
input_x_dims
[
dim
]
==
3
,
true
,
errors
::
InvalidArgument
(
"Input(X/Y).dims[dim] must be equal to 3. But received: "
"Input(X/Y).dims[dim] = [%d]."
,
input_x_dims
[
dim
]));
}
else
{
for
(
auto
i
=
0
;
i
<
input_x_dims
.
size
();
i
++
)
{
if
(
input_x_dims
[
i
]
==
3
)
{
dim
=
i
;
break
;
}
}
PADDLE_ENFORCE_EQ
(
dim
==
DDim
::
kMaxRank
,
false
,
errors
::
InvalidArgument
(
"There must be at least one dimension 'd' "
"so that Input(X/Y).dims()[d] is equal to 3. "
"But received: Input(X/Y).dims() == [%s]."
,
input_x_dims
));
}
auto
outer_loops
=
1
;
for
(
auto
i
=
0
;
i
<
dim
;
i
++
)
{
outer_loops
*=
input_x_dims
[
i
];
}
auto
slice_size
=
1
;
for
(
auto
i
=
dim
+
1
;
i
<
input_x_dims
.
size
();
i
++
)
{
slice_size
*=
input_x_dims
[
i
];
}
std
::
vector
<
T
>
input_x_vec
,
input_y_vec
,
input_dout_vec
;
paddle
::
framework
::
TensorToVector
(
input_x
,
dev_ctx
,
&
input_x_vec
);
paddle
::
framework
::
TensorToVector
(
input_y
,
dev_ctx
,
&
input_y_vec
);
paddle
::
framework
::
TensorToVector
(
input_out_grad
,
dev_ctx
,
&
input_dout_vec
);
std
::
vector
<
T
>
out_dx_vec
(
output_x_grad
->
numel
());
std
::
vector
<
T
>
out_dy_vec
(
output_y_grad
->
numel
());
dev_ctx
.
template
Alloc
<
T
>(
output_x_grad
);
dev_ctx
.
template
Alloc
<
T
>(
output_y_grad
);
for
(
auto
i
=
0
;
i
<
outer_loops
;
i
++
)
{
for
(
auto
j
=
0
;
j
<
3
;
j
++
)
{
auto
dst_pos
=
(
3
*
i
+
j
)
*
slice_size
;
auto
in_pos1
=
(
3
*
i
+
((
j
+
1
)
%
3
))
*
slice_size
;
auto
in_pos2
=
(
3
*
i
+
((
j
+
2
)
%
3
))
*
slice_size
;
for
(
auto
k
=
0
;
k
<
slice_size
;
k
++
)
{
out_dx_vec
[
dst_pos
+
k
]
=
input_dout_vec
[
in_pos2
+
k
]
*
input_y_vec
[
in_pos1
+
k
]
-
input_dout_vec
[
in_pos1
+
k
]
*
input_y_vec
[
in_pos2
+
k
];
out_dy_vec
[
dst_pos
+
k
]
=
input_dout_vec
[
in_pos1
+
k
]
*
input_x_vec
[
in_pos2
+
k
]
-
input_dout_vec
[
in_pos2
+
k
]
*
input_x_vec
[
in_pos1
+
k
];
}
}
}
paddle
::
framework
::
TensorFromVector
(
out_dx_vec
,
dev_ctx
,
output_x_grad
);
paddle
::
framework
::
TensorFromVector
(
out_dy_vec
,
dev_ctx
,
output_y_grad
);
output_x_grad
->
Resize
(
input_x_dims
);
output_y_grad
->
Resize
(
input_x_dims
);
}
}
// namespace phi
paddle/phi/kernels/impl/cross_kernel_impl.h
已删除
100644 → 0
浏览文件 @
abbd4abb
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/kernels/funcs/common_shape.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
CrossKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
int
axis
,
DenseTensor
*
out
)
{
auto
&
input_x
=
x
;
auto
&
input_y
=
y
;
auto
*
output
=
out
;
int
dim
=
axis
;
auto
input_x_dims
=
input_x
.
dims
();
auto
input_y_dims
=
input_y
.
dims
();
bool
dims_match
=
phi
::
funcs
::
CheckDims
(
input_x_dims
,
input_y_dims
);
PADDLE_ENFORCE_EQ
(
dims_match
,
true
,
phi
::
errors
::
InvalidArgument
(
"The 'shape' of Input(X) should be equal to "
"the 'shape' of Input(Y). But received "
"Input(X).dimensions = [%s], "
"Input(Y).dimensions = [%s]"
,
input_x_dims
,
input_x_dims
));
if
(
dim
!=
DDim
::
kMaxRank
)
{
PADDLE_ENFORCE_EQ
(
dim
<
input_x_dims
.
size
()
&&
dim
>=
(
0
-
input_x_dims
.
size
()),
true
,
phi
::
errors
::
OutOfRange
(
"Attr(dim) is out of range, It's expected "
"to be in range of [-%d, %d]. But received Attr(dim) = %d."
,
input_x_dims
.
size
(),
input_x_dims
.
size
()
-
1
,
dim
));
if
(
dim
<
0
)
{
dim
+=
input_x_dims
.
size
();
}
PADDLE_ENFORCE_EQ
(
input_x_dims
[
dim
]
==
3
,
true
,
phi
::
errors
::
InvalidArgument
(
"Input(X/Y).dims[dim] must be equal to 3. But received: "
"Input(X/Y).dims[dim] = [%d]."
,
input_x_dims
[
dim
]));
}
else
{
for
(
auto
i
=
0
;
i
<
input_x_dims
.
size
();
i
++
)
{
if
(
input_x_dims
[
i
]
==
3
)
{
dim
=
i
;
break
;
}
}
PADDLE_ENFORCE_EQ
(
dim
==
DDim
::
kMaxRank
,
false
,
phi
::
errors
::
InvalidArgument
(
"There must be at least one dimension 'd' so that "
"Input(X/Y).dims()[d] is equal to 3. "
"But received: Input(X/Y).dims() == [%s]."
,
input_x_dims
));
}
auto
outer_loops
=
1
;
for
(
auto
i
=
0
;
i
<
dim
;
i
++
)
{
outer_loops
*=
input_x_dims
[
i
];
}
auto
slice_size
=
1
;
for
(
auto
i
=
dim
+
1
;
i
<
input_x_dims
.
size
();
i
++
)
{
slice_size
*=
input_x_dims
[
i
];
}
std
::
vector
<
T
>
input_x_vec
,
input_y_vec
;
paddle
::
framework
::
TensorToVector
(
input_x
,
dev_ctx
,
&
input_x_vec
);
paddle
::
framework
::
TensorToVector
(
input_y
,
dev_ctx
,
&
input_y_vec
);
std
::
vector
<
T
>
out_vec
(
output
->
numel
());
dev_ctx
.
template
Alloc
<
T
>(
output
);
for
(
auto
i
=
0
;
i
<
outer_loops
;
i
++
)
{
for
(
auto
j
=
0
;
j
<
3
;
j
++
)
{
auto
dst_pos
=
(
3
*
i
+
j
)
*
slice_size
;
auto
in_pos1
=
(
3
*
i
+
((
j
+
1
)
%
3
))
*
slice_size
;
auto
in_pos2
=
(
3
*
i
+
((
j
+
2
)
%
3
))
*
slice_size
;
for
(
auto
k
=
0
;
k
<
slice_size
;
k
++
)
{
out_vec
[
dst_pos
+
k
]
=
input_x_vec
[
in_pos1
+
k
]
*
input_y_vec
[
in_pos2
+
k
]
-
input_x_vec
[
in_pos2
+
k
]
*
input_y_vec
[
in_pos1
+
k
];
}
}
}
paddle
::
framework
::
TensorFromVector
(
out_vec
,
dev_ctx
,
output
);
output
->
Resize
(
input_x_dims
);
}
}
// namespace phi
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录