Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
ab866777
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
ab866777
编写于
2月 15, 2022
作者:
C
Chen Weihang
提交者:
GitHub
2月 15, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[PTen] Polish trace moving (#39510)
* polish trace moving * remove useless header
上级
4745234f
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
162 addition
and
132 deletion
+162
-132
paddle/pten/kernels/cpu/trace_grad_kernel.cc
paddle/pten/kernels/cpu/trace_grad_kernel.cc
+4
-4
paddle/pten/kernels/cpu/trace_kernel.cc
paddle/pten/kernels/cpu/trace_kernel.cc
+14
-14
paddle/pten/kernels/funcs/diagonal.h
paddle/pten/kernels/funcs/diagonal.h
+130
-0
paddle/pten/kernels/gpu/trace_grad_kernel.cu
paddle/pten/kernels/gpu/trace_grad_kernel.cu
+5
-5
paddle/pten/kernels/gpu/trace_kernel.cu
paddle/pten/kernels/gpu/trace_kernel.cu
+8
-7
paddle/pten/kernels/impl/trace_grad_kernel_impl.h
paddle/pten/kernels/impl/trace_grad_kernel_impl.h
+1
-102
未找到文件。
paddle/pten/kernels/cpu/trace_grad_kernel.cc
浏览文件 @
ab866777
...
...
@@ -13,10 +13,10 @@
// limitations under the License.
#include "paddle/pten/kernels/trace_grad_kernel.h"
#include "paddle/pten/kernels/impl/trace_kernel_impl.h"
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/impl/trace_grad_kernel_impl.h"
PT_REGISTER_KERNEL
(
trace_grad
,
CPU
,
...
...
@@ -26,6 +26,6 @@ PT_REGISTER_KERNEL(trace_grad,
double
,
int
,
int64_t
,
p
addle
::
platform
::
float16
,
p
addle
::
platform
::
complex
<
float
>
,
p
addle
::
platform
::
complex
<
double
>
)
{}
p
ten
::
dtype
::
float16
,
p
ten
::
dtype
::
complex
<
float
>
,
p
ten
::
dtype
::
complex
<
double
>
)
{}
paddle/pten/kernels/cpu/trace_kernel.cc
浏览文件 @
ab866777
...
...
@@ -13,31 +13,31 @@
// limitations under the License.
#include "paddle/pten/kernels/trace_kernel.h"
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/impl/trace_kernel_impl.h"
#include "paddle/pten/kernels/funcs/diagonal.h"
#include "paddle/pten/kernels/funcs/eigen/common.h"
namespace
pten
{
template
<
typename
T
,
typename
Context
>
void
TraceKernel
(
const
Context
&
ctx
,
void
TraceKernel
(
const
Context
&
dev_
ctx
,
const
DenseTensor
&
x
,
int
offset
,
int
axis1
,
int
axis2
,
DenseTensor
*
out
)
{
auto
output_dims
=
out
->
dims
();
T
*
out_data
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
*
out_data
=
dev_ctx
.
template
Alloc
<
T
>(
out
);
const
DenseTensor
diag
=
Diagonal
<
T
,
Context
>
(
ctx
,
&
x
,
offset
,
axis1
,
axis2
);
const
DenseTensor
diag
=
funcs
::
Diagonal
<
T
,
Context
>
(
dev_ctx
,
&
x
,
offset
,
axis1
,
axis2
);
if
(
diag
.
numel
()
>
0
)
{
auto
x
=
paddle
::
framework
::
EigenMatrix
<
T
>::
Reshape
(
diag
,
diag
.
dims
().
size
()
-
1
);
auto
output
=
paddle
::
framework
::
EigenVector
<
T
>::
Flatten
(
*
out
);
auto
x
=
pten
::
EigenMatrix
<
T
>::
Reshape
(
diag
,
diag
.
dims
().
size
()
-
1
);
auto
output
=
pten
::
EigenVector
<
T
>::
Flatten
(
*
out
);
auto
reduce_dim
=
Eigen
::
array
<
int
,
1
>
({
1
});
output
.
device
(
*
ctx
.
eigen_device
())
=
x
.
sum
(
reduce_dim
);
out
->
Resize
(
out
put_dims
);
output
.
device
(
*
dev_
ctx
.
eigen_device
())
=
x
.
sum
(
reduce_dim
);
out
->
Resize
(
out
->
dims
()
);
}
else
{
std
::
fill
(
out_data
,
out_data
+
out
->
numel
(),
static_cast
<
T
>
(
0
));
}
...
...
@@ -53,6 +53,6 @@ PT_REGISTER_KERNEL(trace,
double
,
int
,
int64_t
,
p
addle
::
platform
::
float16
,
p
addle
::
platform
::
complex
<
float
>
,
p
addle
::
platform
::
complex
<
double
>
)
{}
p
ten
::
dtype
::
float16
,
p
ten
::
dtype
::
complex
<
float
>
,
p
ten
::
dtype
::
complex
<
double
>
)
{}
paddle/pten/kernels/funcs/diagonal.h
0 → 100644
浏览文件 @
ab866777
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#if defined(__NVCC__) || defined(__HIPCC__)
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#endif
#include <algorithm>
#include "paddle/fluid/platform/for_range.h"
namespace
pten
{
namespace
funcs
{
template
<
typename
T
>
struct
DiagonalFunctor
{
DiagonalFunctor
(
const
T
*
input
,
const
int64_t
*
diag_stride
,
const
int64_t
*
ret_strides
,
int64_t
pos
,
int64_t
dim_size
,
T
*
diag
)
:
input_
(
input
),
diag_stride_
(
diag_stride
),
ret_strides_
(
ret_strides
),
pos_
(
pos
),
dim_size_
(
dim_size
),
diag_
(
diag
)
{}
HOSTDEVICE
void
operator
()(
size_t
idx
)
const
{
int64_t
position
=
pos_
;
int64_t
num
=
idx
;
for
(
int64_t
i
=
0
;
i
<
dim_size_
;
i
++
)
{
position
+=
num
/
diag_stride_
[
i
]
*
ret_strides_
[
i
];
num
=
num
%
diag_stride_
[
i
];
}
diag_
[
idx
]
=
input_
[
position
];
}
const
T
*
input_
;
const
int64_t
*
diag_stride_
;
const
int64_t
*
ret_strides_
;
int64_t
pos_
;
int64_t
dim_size_
;
T
*
diag_
;
};
template
<
typename
T
,
typename
DeviceContext
>
DenseTensor
Diagonal
(
const
DeviceContext
&
context
,
const
DenseTensor
*
input
,
int64_t
offset
,
int64_t
dim1
,
int64_t
dim2
)
{
auto
*
input_data
=
input
->
data
<
T
>
();
auto
input_dims
=
input
->
dims
();
auto
input_stride
=
framework
::
stride
(
input_dims
);
auto
dim1_
=
dim1
<
0
?
input_dims
.
size
()
+
dim1
:
dim1
;
auto
dim2_
=
dim2
<
0
?
input_dims
.
size
()
+
dim2
:
dim2
;
auto
len1
=
input_dims
[
std
::
min
(
dim1_
,
dim2_
)];
auto
len2
=
input_dims
[
std
::
max
(
dim1_
,
dim2_
)];
auto
stride1
=
input_stride
[
std
::
min
(
dim1_
,
dim2_
)];
auto
stride2
=
input_stride
[
std
::
max
(
dim1_
,
dim2_
)];
int
offset_stride
=
0
;
if
(
offset
>=
0
)
{
offset_stride
=
stride2
;
len2
-=
offset
;
}
else
{
offset_stride
=
stride1
;
len1
+=
offset
;
}
int
diag_size
=
len2
<
len1
?
len2
:
len1
;
if
(
diag_size
>
0
)
{
auto
ret_strides
=
vectorize
(
input_stride
);
auto
ret_dims
=
vectorize
(
input_dims
);
ret_strides
.
erase
(
ret_strides
.
begin
()
+
std
::
max
(
dim1_
,
dim2_
));
ret_strides
.
erase
(
ret_strides
.
begin
()
+
std
::
min
(
dim1_
,
dim2_
));
ret_dims
.
erase
(
ret_dims
.
begin
()
+
std
::
max
(
dim1_
,
dim2_
));
ret_dims
.
erase
(
ret_dims
.
begin
()
+
std
::
min
(
dim1_
,
dim2_
));
if
(
ret_strides
.
empty
())
{
ret_strides
.
push_back
(
1
);
ret_dims
.
push_back
(
1
);
}
ret_strides
.
push_back
(
stride1
+
stride2
);
ret_dims
.
push_back
(
diag_size
);
DenseTensor
diag
;
framework
::
DDim
diag_dims
=
framework
::
make_ddim
(
ret_dims
);
auto
dig_stride
=
framework
::
stride
(
diag_dims
);
auto
diag_data
=
diag
.
mutable_data
<
T
>
(
diag_dims
,
context
.
GetPlace
());
int64_t
pos
=
std
::
abs
(
offset
)
*
offset_stride
;
int64_t
dim_size
=
ret_strides
.
size
();
#if defined(__NVCC__) || defined(__HIPCC__)
thrust
::
device_vector
<
int64_t
>
diag_vec
(
vectorize
(
dig_stride
));
const
int64_t
*
diag_arr
=
thrust
::
raw_pointer_cast
(
diag_vec
.
data
());
thrust
::
device_vector
<
int64_t
>
ret_vec
(
ret_strides
);
const
int64_t
*
ret_arr
=
thrust
::
raw_pointer_cast
(
ret_vec
.
data
());
#else
auto
*
diag_arr
=
dig_stride
.
Get
();
const
auto
*
ret_arr
=
ret_strides
.
data
();
#endif
// auto& dev_ctx = context.template device_context<DeviceContext>();
paddle
::
platform
::
ForRange
<
DeviceContext
>
for_range
(
context
,
diag
.
numel
());
DiagonalFunctor
<
T
>
functor
(
input_data
,
diag_arr
,
ret_arr
,
pos
,
dim_size
,
diag_data
);
for_range
(
functor
);
return
diag
;
}
else
{
return
{};
}
}
}
// namespace funcs
}
// namespace pten
paddle/pten/kernels/gpu/trace_grad_kernel.cu
浏览文件 @
ab866777
...
...
@@ -12,11 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/pten/kernels/impl/trace_kernel_impl.h"
#include "paddle/pten/kernels/trace_grad_kernel.h"
#include "paddle/pten/backends/
cpu/c
pu_context.h"
#include "paddle/pten/backends/
gpu/g
pu_context.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/impl/trace_grad_kernel_impl.h"
PT_REGISTER_KERNEL
(
trace_grad
,
GPU
,
...
...
@@ -26,6 +26,6 @@ PT_REGISTER_KERNEL(trace_grad,
double
,
int
,
int64_t
,
p
addle
::
platform
::
float16
,
p
addle
::
platform
::
complex
<
float
>
,
p
addle
::
platform
::
complex
<
double
>
)
{}
p
ten
::
dtype
::
float16
,
p
ten
::
dtype
::
complex
<
float
>
,
p
ten
::
dtype
::
complex
<
double
>
)
{}
paddle/pten/kernels/gpu/trace_kernel.cu
浏览文件 @
ab866777
...
...
@@ -12,11 +12,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/pten/kernels/trace_kernel.h"
#include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/funcs/diagonal.h"
#include "paddle/pten/kernels/gpu/reduce.h"
#include "paddle/pten/kernels/impl/trace_kernel_impl.h"
#include "paddle/pten/kernels/trace_kernel.h"
namespace
pten
{
...
...
@@ -27,8 +28,8 @@ void TraceKernel(const Context& ctx,
int
axis1
,
int
axis2
,
DenseTensor
*
out
)
{
T
*
out_data
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()
);
auto
diag
=
Diagonal
<
T
,
Context
>
(
ctx
,
&
x
,
offset
,
axis1
,
axis2
);
T
*
out_data
=
ctx
.
template
Alloc
<
T
>(
out
);
auto
diag
=
funcs
::
Diagonal
<
T
,
Context
>
(
ctx
,
&
x
,
offset
,
axis1
,
axis2
);
if
(
diag
.
numel
()
>
0
)
{
auto
stream
=
ctx
.
stream
();
std
::
vector
<
int
>
reduce_dims
;
...
...
@@ -51,6 +52,6 @@ PT_REGISTER_KERNEL(trace,
double
,
int
,
int64_t
,
p
addle
::
platform
::
float16
,
p
addle
::
platform
::
complex
<
float
>
,
p
addle
::
platform
::
complex
<
double
>
)
{}
p
ten
::
dtype
::
float16
,
p
ten
::
dtype
::
complex
<
float
>
,
p
ten
::
dtype
::
complex
<
double
>
)
{}
paddle/pten/kernels/impl/trace_kernel_impl.h
→
paddle/pten/kernels/impl/trace_
grad_
kernel_impl.h
浏览文件 @
ab866777
...
...
@@ -21,44 +21,10 @@
#include <algorithm>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/pten/kernels/funcs/math_function.h"
namespace
pten
{
template
<
typename
T
>
struct
DiagonalFunctor
{
DiagonalFunctor
(
const
T
*
input
,
const
int64_t
*
diag_stride
,
const
int64_t
*
ret_strides
,
int64_t
pos
,
int64_t
dim_size
,
T
*
diag
)
:
input_
(
input
),
diag_stride_
(
diag_stride
),
ret_strides_
(
ret_strides
),
pos_
(
pos
),
dim_size_
(
dim_size
),
diag_
(
diag
)
{}
HOSTDEVICE
void
operator
()(
size_t
idx
)
const
{
int64_t
position
=
pos_
;
int64_t
num
=
idx
;
for
(
int64_t
i
=
0
;
i
<
dim_size_
;
i
++
)
{
position
+=
num
/
diag_stride_
[
i
]
*
ret_strides_
[
i
];
num
=
num
%
diag_stride_
[
i
];
}
diag_
[
idx
]
=
input_
[
position
];
}
const
T
*
input_
;
const
int64_t
*
diag_stride_
;
const
int64_t
*
ret_strides_
;
int64_t
pos_
;
int64_t
dim_size_
;
T
*
diag_
;
};
template
<
typename
T
>
struct
TraceGradFunctor
{
...
...
@@ -114,73 +80,6 @@ struct TraceGradFunctor {
T
*
d_x_
;
};
template
<
typename
T
,
typename
DeviceContext
>
DenseTensor
Diagonal
(
const
DeviceContext
&
context
,
const
DenseTensor
*
input
,
int64_t
offset
,
int64_t
dim1
,
int64_t
dim2
)
{
auto
*
input_data
=
input
->
data
<
T
>
();
auto
input_dims
=
input
->
dims
();
auto
input_stride
=
framework
::
stride
(
input_dims
);
auto
dim1_
=
dim1
<
0
?
input_dims
.
size
()
+
dim1
:
dim1
;
auto
dim2_
=
dim2
<
0
?
input_dims
.
size
()
+
dim2
:
dim2
;
auto
len1
=
input_dims
[
std
::
min
(
dim1_
,
dim2_
)];
auto
len2
=
input_dims
[
std
::
max
(
dim1_
,
dim2_
)];
auto
stride1
=
input_stride
[
std
::
min
(
dim1_
,
dim2_
)];
auto
stride2
=
input_stride
[
std
::
max
(
dim1_
,
dim2_
)];
int
offset_stride
=
0
;
if
(
offset
>=
0
)
{
offset_stride
=
stride2
;
len2
-=
offset
;
}
else
{
offset_stride
=
stride1
;
len1
+=
offset
;
}
int
diag_size
=
len2
<
len1
?
len2
:
len1
;
if
(
diag_size
>
0
)
{
auto
ret_strides
=
vectorize
(
input_stride
);
auto
ret_dims
=
vectorize
(
input_dims
);
ret_strides
.
erase
(
ret_strides
.
begin
()
+
std
::
max
(
dim1_
,
dim2_
));
ret_strides
.
erase
(
ret_strides
.
begin
()
+
std
::
min
(
dim1_
,
dim2_
));
ret_dims
.
erase
(
ret_dims
.
begin
()
+
std
::
max
(
dim1_
,
dim2_
));
ret_dims
.
erase
(
ret_dims
.
begin
()
+
std
::
min
(
dim1_
,
dim2_
));
if
(
ret_strides
.
empty
())
{
ret_strides
.
push_back
(
1
);
ret_dims
.
push_back
(
1
);
}
ret_strides
.
push_back
(
stride1
+
stride2
);
ret_dims
.
push_back
(
diag_size
);
DenseTensor
diag
;
framework
::
DDim
diag_dims
=
framework
::
make_ddim
(
ret_dims
);
auto
dig_stride
=
framework
::
stride
(
diag_dims
);
auto
diag_data
=
diag
.
mutable_data
<
T
>
(
diag_dims
,
context
.
GetPlace
());
int64_t
pos
=
std
::
abs
(
offset
)
*
offset_stride
;
int64_t
dim_size
=
ret_strides
.
size
();
#if defined(__NVCC__) || defined(__HIPCC__)
thrust
::
device_vector
<
int64_t
>
diag_vec
(
vectorize
(
dig_stride
));
const
int64_t
*
diag_arr
=
thrust
::
raw_pointer_cast
(
diag_vec
.
data
());
thrust
::
device_vector
<
int64_t
>
ret_vec
(
ret_strides
);
const
int64_t
*
ret_arr
=
thrust
::
raw_pointer_cast
(
ret_vec
.
data
());
#else
auto
*
diag_arr
=
dig_stride
.
Get
();
const
auto
*
ret_arr
=
ret_strides
.
data
();
#endif
// auto& dev_ctx = context.template device_context<DeviceContext>();
paddle
::
platform
::
ForRange
<
DeviceContext
>
for_range
(
context
,
diag
.
numel
());
DiagonalFunctor
<
T
>
functor
(
input_data
,
diag_arr
,
ret_arr
,
pos
,
dim_size
,
diag_data
);
for_range
(
functor
);
return
diag
;
}
else
{
return
{};
}
}
template
<
typename
T
,
typename
Context
>
void
TraceGradKernel
(
const
Context
&
ctx
,
const
DenseTensor
&
out_grad
,
...
...
@@ -195,7 +94,7 @@ void TraceGradKernel(const Context& ctx,
auto
output_stride
=
framework
::
stride
(
output_dims
);
auto
*
out_data
=
out_grad
.
data
<
T
>
();
T
*
x_data
=
in_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()
);
T
*
x_data
=
ctx
.
template
Alloc
<
T
>(
in_grad
);
pten
::
funcs
::
SetConstant
<
Context
,
T
>
set_zero
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录