Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
f71241b9
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
f71241b9
编写于
2月 08, 2022
作者:
Y
Yiqun Liu
提交者:
GitHub
2月 08, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Rename partial function name TensorReduceFunctorImpl to TensorReduceImpl. (#39388)
上级
e4d475ea
变更
17
隐藏空白更改
内联
并排
Showing
17 changed file
with
29 addition
and
31 deletion
+29
-31
paddle/fluid/operators/broadcast_tensors_op.cu
paddle/fluid/operators/broadcast_tensors_op.cu
+1
-1
paddle/fluid/operators/cholesky_solve_op.cu
paddle/fluid/operators/cholesky_solve_op.cu
+1
-1
paddle/fluid/operators/clip_by_norm_op.cu
paddle/fluid/operators/clip_by_norm_op.cu
+2
-2
paddle/fluid/operators/controlflow/compare_all_op.cu
paddle/fluid/operators/controlflow/compare_all_op.cu
+1
-2
paddle/fluid/operators/elementwise/elementwise_op_function.h
paddle/fluid/operators/elementwise/elementwise_op_function.h
+1
-1
paddle/fluid/operators/fused/attn_gemm.h
paddle/fluid/operators/fused/attn_gemm.h
+1
-1
paddle/fluid/operators/kron_op.h
paddle/fluid/operators/kron_op.h
+2
-2
paddle/fluid/operators/margin_cross_entropy_op.cu
paddle/fluid/operators/margin_cross_entropy_op.cu
+2
-2
paddle/fluid/operators/mean_op.cu
paddle/fluid/operators/mean_op.cu
+1
-1
paddle/fluid/operators/p_norm_op.cu
paddle/fluid/operators/p_norm_op.cu
+4
-4
paddle/fluid/operators/pool_op.h
paddle/fluid/operators/pool_op.h
+1
-2
paddle/fluid/operators/prelu_op.cu
paddle/fluid/operators/prelu_op.cu
+1
-1
paddle/fluid/operators/reduce_ops/reduce_op.cu.h
paddle/fluid/operators/reduce_ops/reduce_op.cu.h
+5
-5
paddle/fluid/operators/renorm_op.cu
paddle/fluid/operators/renorm_op.cu
+3
-3
paddle/fluid/operators/solve_op.h
paddle/fluid/operators/solve_op.h
+1
-1
paddle/fluid/operators/trace_op.cu
paddle/fluid/operators/trace_op.cu
+1
-1
paddle/fluid/operators/triangular_solve_op.cu
paddle/fluid/operators/triangular_solve_op.cu
+1
-1
未找到文件。
paddle/fluid/operators/broadcast_tensors_op.cu
浏览文件 @
f71241b9
...
@@ -89,7 +89,7 @@ class CUDABroadcastTensorsGradOpKernel : public framework::OpKernel<T> {
...
@@ -89,7 +89,7 @@ class CUDABroadcastTensorsGradOpKernel : public framework::OpKernel<T> {
}
else
{
}
else
{
// reduce_sum implementation on CUDA
// reduce_sum implementation on CUDA
auto
stream
=
context
.
cuda_device_context
().
stream
();
auto
stream
=
context
.
cuda_device_context
().
stream
();
TensorReduce
Functor
Impl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
TensorReduceImpl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
context
.
cuda_device_context
(),
*
input_tensor
,
output_tensor
,
context
.
cuda_device_context
(),
*
input_tensor
,
output_tensor
,
kps
::
IdentityFunctor
<
T
>
(),
reduce_dims_vec
,
stream
);
kps
::
IdentityFunctor
<
T
>
(),
reduce_dims_vec
,
stream
);
}
}
...
...
paddle/fluid/operators/cholesky_solve_op.cu
浏览文件 @
f71241b9
...
@@ -114,7 +114,7 @@ class MatrixReduceSumFunctor<platform::CUDADeviceContext, T> {
...
@@ -114,7 +114,7 @@ class MatrixReduceSumFunctor<platform::CUDADeviceContext, T> {
}
}
}
}
gpuStream_t
stream
=
ctx
.
cuda_device_context
().
stream
();
gpuStream_t
stream
=
ctx
.
cuda_device_context
().
stream
();
TensorReduce
Functor
Impl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
TensorReduceImpl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
ctx
.
cuda_device_context
(),
in
,
out
,
kps
::
IdentityFunctor
<
T
>
(),
ctx
.
cuda_device_context
(),
in
,
out
,
kps
::
IdentityFunctor
<
T
>
(),
out_reduce_dims
,
stream
);
out_reduce_dims
,
stream
);
}
}
...
...
paddle/fluid/operators/clip_by_norm_op.cu
浏览文件 @
f71241b9
...
@@ -75,8 +75,8 @@ class ClipByNormKernel<platform::CUDADeviceContext, platform::float16>
...
@@ -75,8 +75,8 @@ class ClipByNormKernel<platform::CUDADeviceContext, platform::float16>
}
}
Tensor
tmp
=
context
.
AllocateTmpTensor
<
float
,
platform
::
CUDADeviceContext
>
(
Tensor
tmp
=
context
.
AllocateTmpTensor
<
float
,
platform
::
CUDADeviceContext
>
(
{
1
},
dev_ctx
);
{
1
},
dev_ctx
);
TensorReduce
Functor
Impl
<
platform
::
float16
,
float
,
kps
::
AddFunctor
,
TensorReduceImpl
<
platform
::
float16
,
float
,
kps
::
AddFunctor
,
kps
::
SquareFunctor
<
platform
::
float16
,
float
>>
(
kps
::
SquareFunctor
<
platform
::
float16
,
float
>>
(
dev_ctx
,
*
input
,
&
tmp
,
kps
::
SquareFunctor
<
platform
::
float16
,
float
>
(),
dev_ctx
,
*
input
,
&
tmp
,
kps
::
SquareFunctor
<
platform
::
float16
,
float
>
(),
reduce_dims
,
dev_ctx
.
stream
());
reduce_dims
,
dev_ctx
.
stream
());
auto
tmp_eigen
=
EigenVector
<
float
>::
Flatten
(
tmp
);
auto
tmp_eigen
=
EigenVector
<
float
>::
Flatten
(
tmp
);
...
...
paddle/fluid/operators/controlflow/compare_all_op.cu
浏览文件 @
f71241b9
...
@@ -63,8 +63,7 @@ class CompareReduceOpKernel
...
@@ -63,8 +63,7 @@ class CompareReduceOpKernel
reduce_dims
.
resize
(
tmp
.
dims
().
size
());
reduce_dims
.
resize
(
tmp
.
dims
().
size
());
for
(
int
i
=
0
;
i
<
reduce_dims
.
size
();
++
i
)
reduce_dims
[
i
]
=
i
;
for
(
int
i
=
0
;
i
<
reduce_dims
.
size
();
++
i
)
reduce_dims
[
i
]
=
i
;
auto
stream
=
context
.
cuda_device_context
().
stream
();
auto
stream
=
context
.
cuda_device_context
().
stream
();
TensorReduceFunctorImpl
<
bool
,
bool
,
BitwiseAdd
,
TensorReduceImpl
<
bool
,
bool
,
BitwiseAdd
,
kps
::
IdentityFunctor
<
bool
>>
(
kps
::
IdentityFunctor
<
bool
>>
(
context
.
cuda_device_context
(),
tmp
,
z
,
kps
::
IdentityFunctor
<
bool
>
(),
context
.
cuda_device_context
(),
tmp
,
z
,
kps
::
IdentityFunctor
<
bool
>
(),
reduce_dims
,
stream
);
reduce_dims
,
stream
);
}
}
...
...
paddle/fluid/operators/elementwise/elementwise_op_function.h
浏览文件 @
f71241b9
...
@@ -1188,7 +1188,7 @@ template <typename T>
...
@@ -1188,7 +1188,7 @@ template <typename T>
void
ReduceWrapper
(
const
platform
::
CUDADeviceContext
&
dev_ctx
,
int
axis
,
void
ReduceWrapper
(
const
platform
::
CUDADeviceContext
&
dev_ctx
,
int
axis
,
framework
::
Tensor
*
src
,
framework
::
Tensor
*
dst
)
{
framework
::
Tensor
*
src
,
framework
::
Tensor
*
dst
)
{
std
::
vector
<
int
>
reduce_dims
=
GetReduceDim
(
dst
->
dims
(),
src
->
dims
(),
axis
);
std
::
vector
<
int
>
reduce_dims
=
GetReduceDim
(
dst
->
dims
(),
src
->
dims
(),
axis
);
TensorReduce
Functor
Impl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
TensorReduceImpl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
dev_ctx
,
*
src
,
dst
,
kps
::
IdentityFunctor
<
T
>
(),
reduce_dims
,
dev_ctx
,
*
src
,
dst
,
kps
::
IdentityFunctor
<
T
>
(),
reduce_dims
,
dev_ctx
.
stream
());
dev_ctx
.
stream
());
}
}
...
...
paddle/fluid/operators/fused/attn_gemm.h
浏览文件 @
f71241b9
...
@@ -165,7 +165,7 @@ class AttnMatMul {
...
@@ -165,7 +165,7 @@ class AttnMatMul {
(
input_dims
[
2
]
==
output_dims
[
0
]));
(
input_dims
[
2
]
==
output_dims
[
0
]));
if
(
support_case_1
||
support_case_2
)
{
if
(
support_case_1
||
support_case_2
)
{
gpuStream_t
stream
=
dev_ctx_
.
stream
();
gpuStream_t
stream
=
dev_ctx_
.
stream
();
TensorReduce
Functor
Impl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
TensorReduceImpl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
dev_ctx_
,
*
d_output
,
d_bias
,
kps
::
IdentityFunctor
<
T
>
(),
{
0
,
1
},
dev_ctx_
,
*
d_output
,
d_bias
,
kps
::
IdentityFunctor
<
T
>
(),
{
0
,
1
},
stream
);
stream
);
}
else
{
}
else
{
...
...
paddle/fluid/operators/kron_op.h
浏览文件 @
f71241b9
...
@@ -305,11 +305,11 @@ struct KronGradOpFunctor {
...
@@ -305,11 +305,11 @@ struct KronGradOpFunctor {
#if defined(__NVCC__) || defined(__HIPCC__)
#if defined(__NVCC__) || defined(__HIPCC__)
auto
stream
=
dev_ctx
.
stream
();
// it is a cuda device_context
auto
stream
=
dev_ctx
.
stream
();
// it is a cuda device_context
if
(
dx
)
{
if
(
dx
)
{
TensorReduce
Functor
Impl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
TensorReduceImpl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
dev_ctx
,
dout_x
,
dx
,
kps
::
IdentityFunctor
<
T
>
(),
{
1
},
stream
);
dev_ctx
,
dout_x
,
dx
,
kps
::
IdentityFunctor
<
T
>
(),
{
1
},
stream
);
}
}
if
(
dy
)
{
if
(
dy
)
{
TensorReduce
Functor
Impl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
TensorReduceImpl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
dev_ctx
,
dout_y
,
dy
,
kps
::
IdentityFunctor
<
T
>
(),
{
1
},
stream
);
dev_ctx
,
dout_y
,
dy
,
kps
::
IdentityFunctor
<
T
>
(),
{
1
},
stream
);
}
}
#else
#else
...
...
paddle/fluid/operators/margin_cross_entropy_op.cu
浏览文件 @
f71241b9
...
@@ -298,7 +298,7 @@ class MarginCrossEntropyOpCUDAKernel : public framework::OpKernel<T> {
...
@@ -298,7 +298,7 @@ class MarginCrossEntropyOpCUDAKernel : public framework::OpKernel<T> {
logits_max
=
logits_max
=
ctx
.
AllocateTmpTensor
<
T
,
platform
::
CUDADeviceContext
>
({
N
,
1
},
dev_ctx
);
ctx
.
AllocateTmpTensor
<
T
,
platform
::
CUDADeviceContext
>
({
N
,
1
},
dev_ctx
);
T
*
logits_max_buff
=
logits_max
.
mutable_data
<
T
>
(
place
);
T
*
logits_max_buff
=
logits_max
.
mutable_data
<
T
>
(
place
);
TensorReduce
Functor
Impl
<
T
,
T
,
kps
::
MaxFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
TensorReduceImpl
<
T
,
T
,
kps
::
MaxFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
dev_ctx
,
softmax_2d
,
&
logits_max
,
kps
::
IdentityFunctor
<
T
>
(),
{
1
},
dev_ctx
,
softmax_2d
,
&
logits_max
,
kps
::
IdentityFunctor
<
T
>
(),
{
1
},
dev_ctx
.
stream
());
dev_ctx
.
stream
());
...
@@ -320,7 +320,7 @@ class MarginCrossEntropyOpCUDAKernel : public framework::OpKernel<T> {
...
@@ -320,7 +320,7 @@ class MarginCrossEntropyOpCUDAKernel : public framework::OpKernel<T> {
sum_exp_logits
=
sum_exp_logits
=
ctx
.
AllocateTmpTensor
<
T
,
platform
::
CUDADeviceContext
>
({
N
,
1
},
dev_ctx
);
ctx
.
AllocateTmpTensor
<
T
,
platform
::
CUDADeviceContext
>
({
N
,
1
},
dev_ctx
);
T
*
sum_exp_logits_buff
=
sum_exp_logits
.
mutable_data
<
T
>
(
place
);
T
*
sum_exp_logits_buff
=
sum_exp_logits
.
mutable_data
<
T
>
(
place
);
TensorReduce
Functor
Impl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
ExpFunctor
<
T
>>
(
TensorReduceImpl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
ExpFunctor
<
T
>>
(
dev_ctx
,
softmax_2d
,
&
sum_exp_logits
,
kps
::
ExpFunctor
<
T
>
(),
{
1
},
dev_ctx
,
softmax_2d
,
&
sum_exp_logits
,
kps
::
ExpFunctor
<
T
>
(),
{
1
},
dev_ctx
.
stream
());
dev_ctx
.
stream
());
...
...
paddle/fluid/operators/mean_op.cu
浏览文件 @
f71241b9
...
@@ -65,7 +65,7 @@ class MeanCUDAKernel : public framework::OpKernel<T> {
...
@@ -65,7 +65,7 @@ class MeanCUDAKernel : public framework::OpKernel<T> {
for
(
decltype
(
rank
)
i
=
0
;
i
<
rank
;
++
i
)
{
for
(
decltype
(
rank
)
i
=
0
;
i
<
rank
;
++
i
)
{
reduce_dims
.
push_back
(
i
);
reduce_dims
.
push_back
(
i
);
}
}
TensorReduce
Functor
Impl
<
T
,
T
,
kernel_primitives
::
AddFunctor
,
Div
>
(
TensorReduceImpl
<
T
,
T
,
kernel_primitives
::
AddFunctor
,
Div
>
(
context
.
cuda_device_context
(),
*
input
,
output
,
Div
(
numel
),
reduce_dims
,
context
.
cuda_device_context
(),
*
input
,
output
,
Div
(
numel
),
reduce_dims
,
stream
);
stream
);
}
}
...
...
paddle/fluid/operators/p_norm_op.cu
浏览文件 @
f71241b9
...
@@ -105,19 +105,19 @@ class PnormCUDAKernel : public framework::OpKernel<T> {
...
@@ -105,19 +105,19 @@ class PnormCUDAKernel : public framework::OpKernel<T> {
using
MT
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
using
MT
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
if
(
porder
==
0
)
{
if
(
porder
==
0
)
{
TensorReduce
Functor
Impl
<
T
,
T
,
kps
::
AddFunctor
,
NonzeroFunctor
<
T
>>
(
TensorReduceImpl
<
T
,
T
,
kps
::
AddFunctor
,
NonzeroFunctor
<
T
>>
(
ctx
.
cuda_device_context
(),
*
in_x
,
out_norm
,
NonzeroFunctor
<
T
>
(),
ctx
.
cuda_device_context
(),
*
in_x
,
out_norm
,
NonzeroFunctor
<
T
>
(),
reduce_axis
,
stream
);
reduce_axis
,
stream
);
}
else
if
(
porder
==
INFINITY
)
{
}
else
if
(
porder
==
INFINITY
)
{
TensorReduce
Functor
Impl
<
T
,
T
,
kps
::
MaxFunctor
,
AbsFunctor
<
T
>>
(
TensorReduceImpl
<
T
,
T
,
kps
::
MaxFunctor
,
AbsFunctor
<
T
>>
(
ctx
.
cuda_device_context
(),
*
in_x
,
out_norm
,
AbsFunctor
<
T
>
(),
ctx
.
cuda_device_context
(),
*
in_x
,
out_norm
,
AbsFunctor
<
T
>
(),
reduce_axis
,
stream
);
reduce_axis
,
stream
);
}
else
if
(
porder
==
-
INFINITY
)
{
}
else
if
(
porder
==
-
INFINITY
)
{
TensorReduce
Functor
Impl
<
T
,
T
,
kps
::
MinFunctor
,
AbsFunctor
<
T
>>
(
TensorReduceImpl
<
T
,
T
,
kps
::
MinFunctor
,
AbsFunctor
<
T
>>
(
ctx
.
cuda_device_context
(),
*
in_x
,
out_norm
,
AbsFunctor
<
T
>
(),
ctx
.
cuda_device_context
(),
*
in_x
,
out_norm
,
AbsFunctor
<
T
>
(),
reduce_axis
,
stream
);
reduce_axis
,
stream
);
}
else
{
}
else
{
TensorReduce
Functor
Impl
<
T
,
T
,
kps
::
AddFunctor
,
UnsignedPowFunctor
<
T
>>
(
TensorReduceImpl
<
T
,
T
,
kps
::
AddFunctor
,
UnsignedPowFunctor
<
T
>>
(
ctx
.
cuda_device_context
(),
*
in_x
,
out_norm
,
ctx
.
cuda_device_context
(),
*
in_x
,
out_norm
,
UnsignedPowFunctor
<
T
>
(
porder
),
reduce_axis
,
stream
);
UnsignedPowFunctor
<
T
>
(
porder
),
reduce_axis
,
stream
);
...
...
paddle/fluid/operators/pool_op.h
浏览文件 @
f71241b9
...
@@ -206,8 +206,7 @@ class PoolKernel : public framework::OpKernel<T> {
...
@@ -206,8 +206,7 @@ class PoolKernel : public framework::OpKernel<T> {
adaptive
)
{
// for adaptive_avg_pool2d && output_size == 1
adaptive
)
{
// for adaptive_avg_pool2d && output_size == 1
#if defined(__HIPCC__) || defined(__NVCC__)
#if defined(__HIPCC__) || defined(__NVCC__)
auto
stream
=
dev_ctx
.
stream
();
auto
stream
=
dev_ctx
.
stream
();
TensorReduceFunctorImpl
<
T
,
T
,
kps
::
AddFunctor
,
TensorReduceImpl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
DivideFunctor
<
T
>>
(
kps
::
DivideFunctor
<
T
>>
(
dev_ctx
,
*
in_x
,
out
,
kps
::
DivideFunctor
<
T
>
(
reduce_num
),
dev_ctx
,
*
in_x
,
out
,
kps
::
DivideFunctor
<
T
>
(
reduce_num
),
reduce_dim
,
stream
);
reduce_dim
,
stream
);
#else // for cpu
#else // for cpu
...
...
paddle/fluid/operators/prelu_op.cu
浏览文件 @
f71241b9
...
@@ -185,7 +185,7 @@ class CUDAPReluGradKernel : public framework::OpKernel<T> {
...
@@ -185,7 +185,7 @@ class CUDAPReluGradKernel : public framework::OpKernel<T> {
reduce_dims
.
push_back
(
i
);
reduce_dims
.
push_back
(
i
);
}
}
TensorReduce
Functor
Impl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
TensorReduceImpl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
context
.
cuda_device_context
(),
dalpha_tmp
,
dalpha
,
context
.
cuda_device_context
(),
dalpha_tmp
,
dalpha
,
kps
::
IdentityFunctor
<
T
>
(),
reduce_dims
,
stream
);
kps
::
IdentityFunctor
<
T
>
(),
reduce_dims
,
stream
);
}
}
...
...
paddle/fluid/operators/reduce_ops/reduce_op.cu.h
浏览文件 @
f71241b9
...
@@ -39,11 +39,11 @@ namespace operators {
...
@@ -39,11 +39,11 @@ namespace operators {
template
<
typename
Tx
,
typename
Ty
,
template
<
typename
>
class
ReduceOp
,
template
<
typename
Tx
,
typename
Ty
,
template
<
typename
>
class
ReduceOp
,
typename
TransformOp
>
typename
TransformOp
>
void
TensorReduce
Functor
Impl
(
const
platform
::
CUDADeviceContext
&
dev_ctx
,
void
TensorReduceImpl
(
const
platform
::
CUDADeviceContext
&
dev_ctx
,
const
framework
::
Tensor
&
x
,
framework
::
Tensor
*
y
,
const
framework
::
Tensor
&
x
,
framework
::
Tensor
*
y
,
const
TransformOp
&
transform
,
const
TransformOp
&
transform
,
const
std
::
vector
<
int
>&
origin_reduce_dims
,
const
std
::
vector
<
int
>&
origin_reduce_dims
,
gpuStream_t
stream
)
{
gpuStream_t
stream
)
{
y
->
mutable_data
<
Ty
>
(
x
.
place
());
y
->
mutable_data
<
Ty
>
(
x
.
place
());
pten
::
kernels
::
TensorReduceFunctorImpl
<
Tx
,
Ty
,
ReduceOp
,
TransformOp
>
(
pten
::
kernels
::
TensorReduceFunctorImpl
<
Tx
,
Ty
,
ReduceOp
,
TransformOp
>
(
...
...
paddle/fluid/operators/renorm_op.cu
浏览文件 @
f71241b9
...
@@ -155,7 +155,7 @@ class CUDARenormKernel : public framework::OpKernel<T> {
...
@@ -155,7 +155,7 @@ class CUDARenormKernel : public framework::OpKernel<T> {
ElementwiseType
::
kUnary
,
MT
,
T
,
UnsignedPowFunctor
<
MT
,
T
>>
(
ElementwiseType
::
kUnary
,
MT
,
T
,
UnsignedPowFunctor
<
MT
,
T
>>
(
cuda_ctx
,
ins
,
&
outs
,
func
);
cuda_ctx
,
ins
,
&
outs
,
func
);
std
::
vector
<
int
>
reduce_axis
=
{
0
,
2
};
std
::
vector
<
int
>
reduce_axis
=
{
0
,
2
};
TensorReduce
Functor
Impl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
TensorReduceImpl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
cuda_ctx
,
pow_value
,
&
dim_value
,
kps
::
IdentityFunctor
<
T
>
(),
reduce_axis
,
cuda_ctx
,
pow_value
,
&
dim_value
,
kps
::
IdentityFunctor
<
T
>
(),
reduce_axis
,
stream
);
stream
);
RenormKernelFunc3
<
T
><<<
grid2
,
block2
,
0
,
stream
>>>
(
RenormKernelFunc3
<
T
><<<
grid2
,
block2
,
0
,
stream
>>>
(
...
@@ -213,10 +213,10 @@ class CUDAGradRenormKernel : public framework::OpKernel<T> {
...
@@ -213,10 +213,10 @@ class CUDAGradRenormKernel : public framework::OpKernel<T> {
mul_value
.
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
numel
,
dimension_each
,
p
,
mul_value
.
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
numel
,
dimension_each
,
p
,
dim_divisor
);
dim_divisor
);
std
::
vector
<
int
>
reduce_axis
=
{
0
,
2
};
std
::
vector
<
int
>
reduce_axis
=
{
0
,
2
};
TensorReduce
Functor
Impl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
TensorReduceImpl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
ctx
.
cuda_device_context
(),
pow_value
,
&
dim_value
,
ctx
.
cuda_device_context
(),
pow_value
,
&
dim_value
,
kps
::
IdentityFunctor
<
T
>
(),
reduce_axis
,
stream
);
kps
::
IdentityFunctor
<
T
>
(),
reduce_axis
,
stream
);
TensorReduce
Functor
Impl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
TensorReduceImpl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
ctx
.
cuda_device_context
(),
mul_value
,
&
weight_derivative
,
ctx
.
cuda_device_context
(),
mul_value
,
&
weight_derivative
,
kps
::
IdentityFunctor
<
T
>
(),
reduce_axis
,
stream
);
kps
::
IdentityFunctor
<
T
>
(),
reduce_axis
,
stream
);
RenormGradKernelFunc2
<
T
><<<
grid
,
block
,
0
,
stream
>>>
(
RenormGradKernelFunc2
<
T
><<<
grid
,
block
,
0
,
stream
>>>
(
...
...
paddle/fluid/operators/solve_op.h
浏览文件 @
f71241b9
...
@@ -45,7 +45,7 @@ void ReduceSumForSolve(const Tensor* input, Tensor* output,
...
@@ -45,7 +45,7 @@ void ReduceSumForSolve(const Tensor* input, Tensor* output,
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
{
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
{
#if defined(__NVCC__) || defined(__HIPCC__)
#if defined(__NVCC__) || defined(__HIPCC__)
auto
stream
=
ctx
.
cuda_device_context
().
stream
();
auto
stream
=
ctx
.
cuda_device_context
().
stream
();
TensorReduce
Functor
Impl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
TensorReduceImpl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
ctx
.
cuda_device_context
(),
*
input
,
output
,
kps
::
IdentityFunctor
<
T
>
(),
ctx
.
cuda_device_context
(),
*
input
,
output
,
kps
::
IdentityFunctor
<
T
>
(),
reduce_dims
,
stream
);
reduce_dims
,
stream
);
#else
#else
...
...
paddle/fluid/operators/trace_op.cu
浏览文件 @
f71241b9
...
@@ -39,7 +39,7 @@ class TraceCUDAKernel : public framework::OpKernel<T> {
...
@@ -39,7 +39,7 @@ class TraceCUDAKernel : public framework::OpKernel<T> {
auto
stream
=
context
.
cuda_device_context
().
stream
();
auto
stream
=
context
.
cuda_device_context
().
stream
();
std
::
vector
<
int
>
reduce_dims
;
std
::
vector
<
int
>
reduce_dims
;
reduce_dims
.
push_back
(
out
->
dims
().
size
());
reduce_dims
.
push_back
(
out
->
dims
().
size
());
TensorReduce
Functor
Impl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
TensorReduceImpl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
context
.
cuda_device_context
(),
diag
,
out
,
kps
::
IdentityFunctor
<
T
>
(),
context
.
cuda_device_context
(),
diag
,
out
,
kps
::
IdentityFunctor
<
T
>
(),
reduce_dims
,
stream
);
reduce_dims
,
stream
);
}
else
{
}
else
{
...
...
paddle/fluid/operators/triangular_solve_op.cu
浏览文件 @
f71241b9
...
@@ -44,7 +44,7 @@ class MatrixReduceSumFunctor<platform::CUDADeviceContext, T> {
...
@@ -44,7 +44,7 @@ class MatrixReduceSumFunctor<platform::CUDADeviceContext, T> {
}
}
}
}
gpuStream_t
stream
=
ctx
.
cuda_device_context
().
stream
();
gpuStream_t
stream
=
ctx
.
cuda_device_context
().
stream
();
TensorReduce
Functor
Impl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
TensorReduceImpl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
ctx
.
cuda_device_context
(),
in
,
out
,
kps
::
IdentityFunctor
<
T
>
(),
ctx
.
cuda_device_context
(),
in
,
out
,
kps
::
IdentityFunctor
<
T
>
(),
out_reduce_dims
,
stream
);
out_reduce_dims
,
stream
);
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录