Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
74894cd7
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
74894cd7
编写于
3月 31, 2022
作者:
C
csy0225
提交者:
GitHub
3月 31, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix conflict (#40851)
上级
e559fe41
变更
44
展开全部
隐藏空白更改
内联
并排
Showing
44 changed file
with
2641 addition
and
684 deletion
+2641
-684
paddle/fluid/operators/range_op.cc
paddle/fluid/operators/range_op.cc
+8
-49
paddle/fluid/operators/range_op.cu
paddle/fluid/operators/range_op.cu
+0
-61
paddle/fluid/operators/range_op_npu_test.cc
paddle/fluid/operators/range_op_npu_test.cc
+1
-1
paddle/fluid/operators/stack_op.cc
paddle/fluid/operators/stack_op.cc
+8
-62
paddle/fluid/operators/stack_op.cu
paddle/fluid/operators/stack_op.cu
+0
-207
paddle/fluid/operators/stack_op_npu.cc
paddle/fluid/operators/stack_op_npu.cc
+1
-1
paddle/fluid/operators/stack_op_xpu.cc
paddle/fluid/operators/stack_op_xpu.cc
+1
-1
paddle/fluid/operators/unique_op.cc
paddle/fluid/operators/unique_op.cc
+37
-76
paddle/fluid/operators/unstack_op.cc
paddle/fluid/operators/unstack_op.cc
+8
-51
paddle/fluid/operators/unstack_op.h
paddle/fluid/operators/unstack_op.h
+0
-174
paddle/fluid/operators/unstack_op_npu.cc
paddle/fluid/operators/unstack_op_npu.cc
+1
-1
paddle/phi/core/utils/data_type.h
paddle/phi/core/utils/data_type.h
+21
-0
paddle/phi/infermeta/multiary.cc
paddle/phi/infermeta/multiary.cc
+46
-0
paddle/phi/infermeta/multiary.h
paddle/phi/infermeta/multiary.h
+4
-0
paddle/phi/infermeta/ternary.cc
paddle/phi/infermeta/ternary.cc
+50
-0
paddle/phi/infermeta/ternary.h
paddle/phi/infermeta/ternary.h
+5
-0
paddle/phi/infermeta/unary.cc
paddle/phi/infermeta/unary.cc
+126
-0
paddle/phi/infermeta/unary.h
paddle/phi/infermeta/unary.h
+28
-0
paddle/phi/kernels/cpu/range_kernel.cc
paddle/phi/kernels/cpu/range_kernel.cc
+45
-0
paddle/phi/kernels/cpu/stack_grad_kernel.cc
paddle/phi/kernels/cpu/stack_grad_kernel.cc
+59
-0
paddle/phi/kernels/cpu/stack_kernel.cc
paddle/phi/kernels/cpu/stack_kernel.cc
+62
-0
paddle/phi/kernels/cpu/unique_kernel.cc
paddle/phi/kernels/cpu/unique_kernel.cc
+131
-0
paddle/phi/kernels/cpu/unstack_grad_kernel.cc
paddle/phi/kernels/cpu/unstack_grad_kernel.cc
+27
-0
paddle/phi/kernels/cpu/unstack_kernel.cc
paddle/phi/kernels/cpu/unstack_kernel.cc
+22
-0
paddle/phi/kernels/funcs/range_function.h
paddle/phi/kernels/funcs/range_function.h
+49
-0
paddle/phi/kernels/funcs/stack_functor.h
paddle/phi/kernels/funcs/stack_functor.h
+83
-0
paddle/phi/kernels/funcs/unique_functor.h
paddle/phi/kernels/funcs/unique_functor.h
+426
-0
paddle/phi/kernels/gpu/range_kernel.cu
paddle/phi/kernels/gpu/range_kernel.cu
+57
-0
paddle/phi/kernels/gpu/stack_grad_kernel.cu
paddle/phi/kernels/gpu/stack_grad_kernel.cu
+143
-0
paddle/phi/kernels/gpu/stack_kernel.cu
paddle/phi/kernels/gpu/stack_kernel.cu
+113
-0
paddle/phi/kernels/gpu/unique_kernel.cu
paddle/phi/kernels/gpu/unique_kernel.cu
+615
-0
paddle/phi/kernels/gpu/unstack_grad_kernel.cu
paddle/phi/kernels/gpu/unstack_grad_kernel.cu
+29
-0
paddle/phi/kernels/gpu/unstack_kernel.cu
paddle/phi/kernels/gpu/unstack_kernel.cu
+29
-0
paddle/phi/kernels/impl/unstack_grad_kernel_impl.h
paddle/phi/kernels/impl/unstack_grad_kernel_impl.h
+70
-0
paddle/phi/kernels/impl/unstack_kernel_impl.h
paddle/phi/kernels/impl/unstack_kernel_impl.h
+62
-0
paddle/phi/kernels/range_kernel.h
paddle/phi/kernels/range_kernel.h
+28
-0
paddle/phi/kernels/stack_grad_kernel.h
paddle/phi/kernels/stack_grad_kernel.h
+27
-0
paddle/phi/kernels/stack_kernel.h
paddle/phi/kernels/stack_kernel.h
+27
-0
paddle/phi/kernels/unique_kernel.h
paddle/phi/kernels/unique_kernel.h
+48
-0
paddle/phi/kernels/unstack_grad_kernel.h
paddle/phi/kernels/unstack_grad_kernel.h
+27
-0
paddle/phi/kernels/unstack_kernel.h
paddle/phi/kernels/unstack_kernel.h
+28
-0
paddle/phi/ops/compat/stack_sig.cc
paddle/phi/ops/compat/stack_sig.cc
+23
-0
paddle/phi/ops/compat/unique_sig.cc
paddle/phi/ops/compat/unique_sig.cc
+42
-0
paddle/phi/ops/compat/unstack_sig.cc
paddle/phi/ops/compat/unstack_sig.cc
+24
-0
未找到文件。
paddle/fluid/operators/range_op.cc
浏览文件 @
74894cd7
...
...
@@ -14,6 +14,10 @@ limitations under the License. */
#include "paddle/fluid/operators/range_op.h"
#include <string>
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/infermeta/ternary.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -22,51 +26,6 @@ class RangeOp : public framework::OperatorWithKernel {
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
if
(
ctx
->
HasInput
(
"Start"
))
{
auto
s_dims
=
ctx
->
GetInputDim
(
"Start"
);
PADDLE_ENFORCE_EQ
(
s_dims
.
size
(),
1
,
platform
::
errors
::
InvalidArgument
(
"The dim of the shape of Input(Start) should be 1, but got %d"
,
s_dims
.
size
()));
PADDLE_ENFORCE_EQ
(
s_dims
[
0
],
1
,
platform
::
errors
::
InvalidArgument
(
"The first dim of the shape of Input(Start) should "
"be 1, but got %d"
,
s_dims
[
0
]));
}
if
(
ctx
->
HasInput
(
"End"
))
{
auto
e_dims
=
ctx
->
GetInputDim
(
"End"
);
PADDLE_ENFORCE_EQ
(
e_dims
.
size
(),
1
,
platform
::
errors
::
InvalidArgument
(
"The dim of the shape of Input(End) should be 1, but got %d"
,
e_dims
.
size
()));
PADDLE_ENFORCE_EQ
(
e_dims
[
0
],
1
,
platform
::
errors
::
InvalidArgument
(
"The first dim of the shape of "
"Input(End) should be 1, but got %d"
,
e_dims
[
0
]));
}
if
(
ctx
->
HasInput
(
"Step"
))
{
auto
step_dims
=
ctx
->
GetInputDim
(
"Step"
);
PADDLE_ENFORCE_EQ
(
step_dims
.
size
(),
1
,
platform
::
errors
::
InvalidArgument
(
"The dim of the shape of Input(Step) should be 1, but got %d"
,
step_dims
.
size
()));
PADDLE_ENFORCE_EQ
(
step_dims
[
0
],
1
,
platform
::
errors
::
InvalidArgument
(
"The first dim of the shape of Input(Step) should "
"be 1, but got %d"
,
step_dims
[
0
]));
}
ctx
->
SetOutputDim
(
"Out"
,
{
-
1
});
}
protected:
framework
::
OpKernelType
GetKernelTypeForVar
(
const
std
::
string
&
var_name
,
const
framework
::
Tensor
&
tensor
,
...
...
@@ -101,7 +60,7 @@ class RangeOpMaker : public framework::OpProtoAndCheckerMaker {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_WITHOUT_GRADIENT
(
range
,
ops
::
RangeOp
,
ops
::
RangeOpMaker
);
REGISTER_OP_CPU_KERNEL
(
range
,
ops
::
CPURangeKernel
<
int
>
,
ops
::
CPURangeKernel
<
float
>
,
ops
::
CPURangeKernel
<
double
>
,
ops
::
CPURangeKernel
<
int64_t
>
);
DECLARE_INFER_SHAPE_FUNCTOR
(
range
,
RangeInferMetaFunctor
,
PD_INFER_META
(
phi
::
RangeInferMeta
));
REGISTER_OP_WITHOUT_GRADIENT
(
range
,
ops
::
RangeOp
,
ops
::
RangeOpMaker
,
RangeInferMetaFunctor
);
paddle/fluid/operators/range_op.cu
已删除
100644 → 0
浏览文件 @
e559fe41
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <algorithm>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/range_op.h"
#include "paddle/fluid/operators/utils.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
__global__
void
RangeKernel
(
T
start
,
T
step
,
int64_t
size
,
T
*
out
)
{
CUDA_KERNEL_LOOP
(
index
,
size
)
{
out
[
index
]
=
start
+
step
*
index
;
}
}
template
<
typename
T
>
class
CUDARangeKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
start_t
=
context
.
Input
<
framework
::
Tensor
>
(
"Start"
);
auto
*
end_t
=
context
.
Input
<
framework
::
Tensor
>
(
"End"
);
auto
*
step_t
=
context
.
Input
<
framework
::
Tensor
>
(
"Step"
);
auto
*
out
=
context
.
Output
<
framework
::
Tensor
>
(
"Out"
);
T
start
=
GetValue
<
T
>
(
start_t
);
T
end
=
GetValue
<
T
>
(
end_t
);
T
step
=
GetValue
<
T
>
(
step_t
);
int64_t
size
=
0
;
GetSize
(
start
,
end
,
step
,
&
size
);
out
->
Resize
(
phi
::
make_ddim
({
size
}));
T
*
out_data
=
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
stream
=
context
.
cuda_device_context
().
stream
();
int
block
=
std
::
min
(
size
,
static_cast
<
int64_t
>
(
256
));
int
grid
=
(
size
+
block
-
1
)
/
block
;
RangeKernel
<
T
><<<
grid
,
block
,
0
,
stream
>>>
(
start
,
step
,
size
,
out_data
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
range
,
ops
::
CUDARangeKernel
<
int
>
,
ops
::
CUDARangeKernel
<
int64_t
>
,
ops
::
CUDARangeKernel
<
float
>
,
ops
::
CUDARangeKernel
<
double
>
);
paddle/fluid/operators/range_op_npu_test.cc
浏览文件 @
74894cd7
...
...
@@ -30,7 +30,7 @@ limitations under the License. */
namespace
f
=
paddle
::
framework
;
namespace
p
=
paddle
::
platform
;
USE_OP
(
range
);
USE_OP
_ITSELF
(
range
);
USE_OP_DEVICE_KERNEL
(
range
,
NPU
);
template
<
typename
T
>
...
...
paddle/fluid/operators/stack_op.cc
浏览文件 @
74894cd7
...
...
@@ -12,9 +12,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/stack_op.h"
#include <memory>
#include <vector>
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/infermeta/multiary.h"
namespace
plat
=
paddle
::
platform
;
namespace
ops
=
paddle
::
operators
;
...
...
@@ -26,52 +29,6 @@ class StackOp : public framework::OperatorWithKernel {
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE_GT
(
ctx
->
Inputs
(
"X"
).
size
(),
0
,
platform
::
errors
::
InvalidArgument
(
"Number of Inputs(X) must be larger than 0, but"
" received value is:%d."
,
ctx
->
Inputs
(
"X"
).
size
()));
PADDLE_ENFORCE_EQ
(
ctx
->
HasOutput
(
"Y"
),
true
,
platform
::
errors
::
InvalidArgument
(
"Output(Y) of stack_op should not be null."
));
auto
input_dims
=
ctx
->
GetInputsDim
(
"X"
);
for
(
size_t
i
=
1
;
i
<
input_dims
.
size
();
++
i
)
{
PADDLE_ENFORCE_EQ
(
input_dims
[
i
],
input_dims
[
0
],
platform
::
errors
::
InvalidArgument
(
"Dims of all Inputs(X) must be the same, but"
" received input %d dim is:%d not equal to input 0"
" dim:%d."
,
i
,
input_dims
[
i
],
input_dims
[
0
]));
}
// Only lod of X[0] would be shared with Y
ctx
->
ShareLoD
(
"X"
,
/*->*/
"Y"
);
int
axis
=
ctx
->
Attrs
().
Get
<
int
>
(
"axis"
);
int
rank
=
input_dims
[
0
].
size
();
PADDLE_ENFORCE_GE
(
axis
,
-
(
rank
+
1
),
platform
::
errors
::
InvalidArgument
(
"Attr(axis) must be inside [-(rank+1), rank+1), where rank = %d, "
"but received axis is:%d."
,
rank
,
axis
));
PADDLE_ENFORCE_LT
(
axis
,
rank
+
1
,
platform
::
errors
::
InvalidArgument
(
"Attr(axis) must be inside [-(rank+1), rank+1), where rank = %d, "
"but received axis is:%d"
,
rank
,
axis
));
if
(
axis
<
0
)
axis
+=
(
rank
+
1
);
auto
vec
=
phi
::
vectorize
<
int
>
(
input_dims
[
0
]);
vec
.
insert
(
vec
.
begin
()
+
axis
,
input_dims
.
size
());
ctx
->
SetOutputDim
(
"Y"
,
phi
::
make_ddim
(
vec
));
}
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
input_data_type
=
...
...
@@ -168,21 +125,10 @@ class StackGradOpMaker : public framework::SingleGradOpMaker<T> {
}
// namespace operators
}
// namespace paddle
DECLARE_INFER_SHAPE_FUNCTOR
(
stack
,
StackInferMetaFunctor
,
PD_INFER_META
(
phi
::
StackInferMeta
));
REGISTER_OPERATOR
(
stack
,
ops
::
StackOp
,
ops
::
StackOpMaker
,
ops
::
StackGradOpMaker
<
paddle
::
framework
::
OpDesc
>
,
ops
::
StackGradOpMaker
<
paddle
::
imperative
::
OpBase
>
);
ops
::
StackGradOpMaker
<
paddle
::
imperative
::
OpBase
>
,
StackInferMetaFunctor
);
REGISTER_OPERATOR
(
stack_grad
,
ops
::
StackOpGrad
);
REGISTER_OP_CPU_KERNEL
(
stack
,
ops
::
StackKernel
<
plat
::
CPUDeviceContext
,
float
>
,
ops
::
StackKernel
<
plat
::
CPUDeviceContext
,
double
>
,
ops
::
StackKernel
<
plat
::
CPUDeviceContext
,
int
>
,
ops
::
StackKernel
<
plat
::
CPUDeviceContext
,
int64_t
>
,
ops
::
StackKernel
<
plat
::
CPUDeviceContext
,
paddle
::
platform
::
bfloat16
>
);
REGISTER_OP_CPU_KERNEL
(
stack_grad
,
ops
::
StackGradKernel
<
plat
::
CPUDeviceContext
,
float
>
,
ops
::
StackGradKernel
<
plat
::
CPUDeviceContext
,
double
>
,
ops
::
StackGradKernel
<
plat
::
CPUDeviceContext
,
int
>
,
ops
::
StackGradKernel
<
plat
::
CPUDeviceContext
,
int64_t
>
,
ops
::
StackGradKernel
<
plat
::
CPUDeviceContext
,
paddle
::
platform
::
bfloat16
>
);
paddle/fluid/operators/stack_op.cu
已删除
100644 → 0
浏览文件 @
e559fe41
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <limits>
#include <vector>
#include "paddle/fluid/operators/stack_op.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
namespace
plat
=
paddle
::
platform
;
namespace
ops
=
paddle
::
operators
;
namespace
paddle
{
namespace
operators
{
template
<
typename
T
,
typename
IntType
>
__global__
void
StackCUDAKernel
(
T
**
input_ptrs
,
int
split_size
,
int
rows
,
int
cols
,
T
*
__restrict__
output
)
{
IntType
grid_x
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
for
(;
grid_x
<
cols
;
grid_x
+=
blockDim
.
x
*
gridDim
.
x
)
{
IntType
grid_y
=
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
;
IntType
split
=
grid_x
/
split_size
;
const
T
*
input_ptr
=
input_ptrs
[
split
];
IntType
col_offset
=
grid_x
%
split_size
;
#pragma unroll
for
(;
grid_y
<
rows
;
grid_y
+=
blockDim
.
y
*
gridDim
.
y
)
{
output
[
grid_y
*
cols
+
grid_x
]
=
input_ptr
[
grid_y
*
split_size
+
col_offset
];
}
}
}
template
<
typename
T
>
class
StackGPUKernel
:
public
framework
::
OpKernel
<
T
>
{
using
Tensor
=
framework
::
LoDTensor
;
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
x
=
ctx
.
MultiInput
<
Tensor
>
(
"X"
);
auto
*
y
=
ctx
.
Output
<
Tensor
>
(
"Y"
);
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
if
(
axis
<
0
)
axis
+=
(
x
[
0
]
->
dims
().
size
()
+
1
);
int
n
=
static_cast
<
int
>
(
x
.
size
());
auto
*
y_data
=
y
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
std
::
vector
<
const
T
*>
x_datas
(
n
);
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
x_datas
[
i
]
=
x
[
i
]
->
data
<
T
>
();
}
auto
&
dev_ctx
=
ctx
.
template
device_context
<
plat
::
CUDADeviceContext
>();
auto
tmp_x_data
=
memory
::
Alloc
(
dev_ctx
,
x_datas
.
size
()
*
sizeof
(
T
*
));
memory
::
Copy
(
dev_ctx
.
GetPlace
(),
tmp_x_data
->
ptr
(),
platform
::
CPUPlace
(),
reinterpret_cast
<
void
*>
(
x_datas
.
data
()),
x_datas
.
size
()
*
sizeof
(
T
*
),
dev_ctx
.
stream
());
// Split x dim from axis to matrix
int
x_row
=
1
,
x_col
=
1
;
for
(
int
i
=
0
;
i
<
axis
;
++
i
)
{
x_row
*=
x
[
0
]
->
dims
()[
i
];
}
x_col
=
x
[
0
]
->
numel
()
/
x_row
;
int
out_col
=
x_col
*
n
;
auto
config
=
GetGpuLaunchConfig2D
(
dev_ctx
,
out_col
,
x_row
);
if
(
y
->
numel
()
<
std
::
numeric_limits
<
int32_t
>::
max
())
{
StackCUDAKernel
<
T
,
int32_t
><<<
config
.
block_per_grid
,
config
.
thread_per_block
,
0
,
dev_ctx
.
stream
()
>>>
(
reinterpret_cast
<
T
**>
(
tmp_x_data
->
ptr
()),
x_col
,
x_row
,
out_col
,
y_data
);
}
else
{
StackCUDAKernel
<
T
,
int64_t
><<<
config
.
block_per_grid
,
config
.
thread_per_block
,
0
,
dev_ctx
.
stream
()
>>>
(
reinterpret_cast
<
T
**>
(
tmp_x_data
->
ptr
()),
x_col
,
x_row
,
out_col
,
y_data
);
}
}
};
template
<
typename
T
,
typename
IntType
>
__global__
void
UnStackHelperCUDAKernel
(
const
T
*
__restrict__
input
,
int
pre_dim_size
,
int
split_dim_size
,
int
suf_dim_size
,
int
num_split
,
T
**
output_ptrs
)
{
assert
(
blockDim
.
y
==
1
);
assert
(
blockDim
.
z
==
1
);
// In this case they are equal
assert
(
split_dim_size
%
num_split
==
0
);
IntType
size
=
pre_dim_size
*
split_dim_size
*
suf_dim_size
;
IntType
each_dim_size
=
split_dim_size
/
num_split
;
for
(
IntType
offset
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
offset
<
size
;
offset
+=
blockDim
.
x
*
gridDim
.
x
)
{
IntType
i
=
offset
/
(
split_dim_size
*
suf_dim_size
);
IntType
j
=
(
offset
%
(
split_dim_size
*
suf_dim_size
))
/
suf_dim_size
;
IntType
k
=
offset
%
suf_dim_size
;
T
*
output
=
output_ptrs
[
j
/
each_dim_size
];
if
(
output
==
nullptr
)
{
return
;
}
IntType
output_ind
=
i
*
each_dim_size
*
suf_dim_size
+
(
j
%
each_dim_size
)
*
suf_dim_size
+
k
;
*
(
output
+
output_ind
)
=
input
[
offset
];
}
}
template
<
typename
T
>
class
StackGradGPUKernel
:
public
framework
::
OpKernel
<
T
>
{
using
Tensor
=
framework
::
LoDTensor
;
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
dy
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
auto
dx
=
ctx
.
MultiOutput
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
if
(
axis
<
0
)
axis
+=
dy
->
dims
().
size
();
int
n
=
dy
->
dims
()[
axis
];
PADDLE_ENFORCE_EQ
(
n
,
dx
.
size
(),
platform
::
errors
::
InvalidArgument
(
"Output dx size should be equal to n, but"
" received n is:%d dx size is:%d."
,
n
,
dx
.
size
()));
// dx is output, so save each data address, then copy each dy into dx_data
std
::
vector
<
T
*>
outputs
(
n
);
auto
out_var_names
=
ctx
.
OutputNames
(
framework
::
GradVarName
(
"X"
));
for
(
size_t
j
=
0
;
j
<
dx
.
size
();
++
j
)
{
if
(
dx
[
j
]
==
nullptr
)
{
outputs
[
j
]
=
nullptr
;
}
if
(
out_var_names
[
j
]
!=
framework
::
kEmptyVarName
&&
dx
[
j
]
->
numel
()
!=
0UL
)
{
T
*
ptr
=
dx
[
j
]
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
outputs
[
j
]
=
ptr
;
}
else
{
outputs
[
j
]
=
nullptr
;
}
}
auto
dy_data
=
dy
->
data
<
T
>
();
// each dx should have same shape
int
dy_pre
=
1
,
dy_suf
=
1
;
auto
dy_dims
=
dy
->
dims
();
int
split_dim
=
n
;
for
(
int
i
=
0
;
i
<
axis
;
++
i
)
{
dy_pre
*=
dy_dims
[
i
];
}
dy_suf
=
dy
->
numel
()
/
(
split_dim
*
dy_pre
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
plat
::
CUDADeviceContext
>();
auto
tmp_out_data
=
memory
::
Alloc
(
dev_ctx
,
outputs
.
size
()
*
sizeof
(
T
*
));
memory
::
Copy
(
dev_ctx
.
GetPlace
(),
tmp_out_data
->
ptr
(),
platform
::
CPUPlace
(),
reinterpret_cast
<
void
*>
(
outputs
.
data
()),
outputs
.
size
()
*
sizeof
(
T
*
),
dev_ctx
.
stream
());
auto
config
=
GetGpuLaunchConfig1D
(
dev_ctx
,
dy_pre
*
split_dim
*
dy_suf
);
if
(
dy
->
numel
()
<
std
::
numeric_limits
<
int32_t
>::
max
())
{
UnStackHelperCUDAKernel
<
T
,
int32_t
><<<
config
.
block_per_grid
.
x
,
config
.
thread_per_block
.
x
,
0
,
dev_ctx
.
stream
()
>>>
(
dy_data
,
dy_pre
,
split_dim
,
dy_suf
,
split_dim
,
reinterpret_cast
<
T
**>
(
tmp_out_data
->
ptr
()));
}
else
{
UnStackHelperCUDAKernel
<
T
,
int64_t
><<<
config
.
block_per_grid
.
x
,
config
.
thread_per_block
.
x
,
0
,
dev_ctx
.
stream
()
>>>
(
dy_data
,
dy_pre
,
split_dim
,
dy_suf
,
split_dim
,
reinterpret_cast
<
T
**>
(
tmp_out_data
->
ptr
()));
}
}
};
}
// namespace operators
}
// namespace paddle
REGISTER_OP_CUDA_KERNEL
(
stack
,
ops
::
StackGPUKernel
<
float
>
,
ops
::
StackGPUKernel
<
double
>
,
ops
::
StackGPUKernel
<
int
>
,
ops
::
StackGPUKernel
<
int64_t
>
,
ops
::
StackGPUKernel
<
plat
::
float16
>
,
ops
::
StackGPUKernel
<
plat
::
bfloat16
>
);
REGISTER_OP_CUDA_KERNEL
(
stack_grad
,
ops
::
StackGradGPUKernel
<
float
>
,
ops
::
StackGradGPUKernel
<
double
>
,
ops
::
StackGradGPUKernel
<
int
>
,
ops
::
StackGradGPUKernel
<
int64_t
>
,
ops
::
StackGradGPUKernel
<
plat
::
float16
>
,
ops
::
StackGradGPUKernel
<
plat
::
bfloat16
>
);
paddle/fluid/operators/stack_op_npu.cc
浏览文件 @
74894cd7
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/
operators/stack_op
.h"
#include "paddle/fluid/
framework/op_registry
.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace
paddle
{
...
...
paddle/fluid/operators/stack_op_xpu.cc
浏览文件 @
74894cd7
...
...
@@ -13,9 +13,9 @@
// limitations under the License.
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/stack_op.h"
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/concat_op.h"
#include "paddle/fluid/platform/device/xpu/xpu_header.h"
...
...
paddle/fluid/operators/unique_op.cc
浏览文件 @
74894cd7
...
...
@@ -13,7 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/unique_op.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include <memory>
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/infermeta/unary.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -25,62 +29,54 @@ class UniqueOp : public framework::OperatorWithKernel {
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
OP_INOUT_CHECK
(
ctx
->
HasInput
(
"X"
),
"Input"
,
"X"
,
"unique"
);
OP_INOUT_CHECK
(
ctx
->
HasOutput
(
"Out"
),
"Output"
,
"Out"
,
"unique"
);
auto
in_dims
=
ctx
->
GetInputDim
(
"X"
);
if
(
!
ctx
->
Attrs
().
Get
<
bool
>
(
"is_sorted"
))
{
OP_INOUT_CHECK
(
ctx
->
HasOutput
(
"Index"
),
"Output"
,
"Index"
,
"unique"
);
PADDLE_ENFORCE_EQ
(
in_dims
.
size
(),
1
,
platform
::
errors
::
InvalidArgument
(
"The Input(X) should be 1-D Tensor, "
"But now the dims of Input(X) is %d."
,
in_dims
.
size
()));
ctx
->
SetOutputDim
(
"Out"
,
{
-
1
});
ctx
->
SetOutputDim
(
"Index"
,
in_dims
);
return
;
}
bool
return_index
=
ctx
->
Attrs
().
Get
<
bool
>
(
"return_index"
);
bool
return_inverse
=
ctx
->
Attrs
().
Get
<
bool
>
(
"return_inverse"
);
bool
return_counts
=
ctx
->
Attrs
().
Get
<
bool
>
(
"return_counts"
);
auto
axis_vec
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"axis"
);
auto
data_type
=
static_cast
<
phi
::
DataType
>
(
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
ctx
->
Attrs
().
Get
<
int
>
(
"dtype"
)));
// Construct MetaTensor for InferMeta Func
using
CompatMetaTensor
=
framework
::
CompatMetaTensor
;
CompatMetaTensor
x
(
ctx
->
GetInputVarPtrs
(
"X"
)[
0
],
ctx
->
IsRuntime
());
CompatMetaTensor
out
(
ctx
->
GetOutputVarPtrs
(
"Out"
)[
0
],
ctx
->
IsRuntime
());
std
::
unique_ptr
<
CompatMetaTensor
>
indices
(
nullptr
);
std
::
unique_ptr
<
CompatMetaTensor
>
index
(
nullptr
);
std
::
unique_ptr
<
CompatMetaTensor
>
counts
(
nullptr
);
if
(
return_index
)
{
OP_INOUT_CHECK
(
ctx
->
HasOutput
(
"Indices"
),
"Output"
,
"Indices"
,
"unique"
);
indices
=
std
::
move
(
std
::
unique_ptr
<
CompatMetaTensor
>
(
new
CompatMetaTensor
(
ctx
->
GetOutputVarPtrs
(
"Indices"
)[
0
],
ctx
->
IsRuntime
())));
}
if
(
return_inverse
)
{
OP_INOUT_CHECK
(
ctx
->
HasOutput
(
"Index"
),
"Output"
,
"Index"
,
"unique"
);
index
=
std
::
move
(
std
::
unique_ptr
<
CompatMetaTensor
>
(
new
CompatMetaTensor
(
ctx
->
GetOutputVarPtrs
(
"Index"
)[
0
],
ctx
->
IsRuntime
())));
}
if
(
return_counts
)
{
OP_INOUT_CHECK
(
ctx
->
HasOutput
(
"Counts"
),
"Output"
,
"Counts"
,
"unique"
);
counts
=
std
::
move
(
std
::
unique_ptr
<
CompatMetaTensor
>
(
new
CompatMetaTensor
(
ctx
->
GetOutputVarPtrs
(
"Counts"
)[
0
],
ctx
->
IsRuntime
())));
}
if
(
axis_vec
.
empty
())
{
ctx
->
SetOutputDim
(
"Out"
,
{
-
1
});
if
(
return_inverse
)
{
ctx
->
SetOutputDim
(
"Index"
,
{
phi
::
product
(
in_dims
)});
}
bool
is_sorted
=
ctx
->
Attrs
().
Get
<
bool
>
(
"is_sorted"
);
if
(
is_sorted
)
{
phi
::
UniqueInferMeta
(
x
,
return_index
,
return_inverse
,
return_counts
,
axis_vec
,
data_type
,
&
out
,
indices
.
get
(),
index
.
get
(),
counts
.
get
());
}
else
{
int
axis
=
axis_vec
[
0
];
if
(
axis
<
0
)
{
axis
+=
in_dims
.
size
();
}
PADDLE_ENFORCE_LT
(
axis
,
in_dims
.
size
(),
platform
::
errors
::
InvalidArgument
(
"The axis(%d) should be less than "
"the dimension size(%d) of x."
,
axis
,
in_dims
.
size
()));
auto
out_dims
=
in_dims
;
out_dims
[
axis
]
=
-
1
;
ctx
->
SetOutputDim
(
"Out"
,
out_dims
);
if
(
return_inverse
)
{
ctx
->
SetOutputDim
(
"Index"
,
{
in_dims
[
axis
]});
OP_INOUT_CHECK
(
ctx
->
HasOutput
(
"Index"
),
"Output"
,
"Index"
,
"unique"
);
if
(
index
==
nullptr
)
{
index
=
std
::
move
(
std
::
unique_ptr
<
CompatMetaTensor
>
(
new
CompatMetaTensor
(
ctx
->
GetOutputVarPtrs
(
"Index"
)[
0
],
ctx
->
IsRuntime
())));
}
}
if
(
return_index
)
{
ctx
->
SetOutputDim
(
"Indices"
,
{
-
1
});
}
if
(
return_counts
)
{
ctx
->
SetOutputDim
(
"Counts"
,
{
-
1
});
phi
::
UniqueRawInferMeta
(
x
,
return_index
,
return_inverse
,
return_counts
,
axis_vec
,
data_type
,
is_sorted
,
&
out
,
indices
.
get
(),
index
.
get
(),
counts
.
get
());
}
}
...
...
@@ -152,40 +148,5 @@ class UniqueOpMaker : public framework::OpProtoAndCheckerMaker {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_WITHOUT_GRADIENT
(
unique
,
ops
::
UniqueOp
,
ops
::
UniqueOpMaker
);
REGISTER_OP_CPU_KERNEL
(
unique
,
ops
::
UniqueKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
UniqueKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
,
ops
::
UniqueKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int32_t
>
,
ops
::
UniqueKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int64_t
>
);
REGISTER_OP_VERSION
(
unique
)
.
AddCheckpoint
(
R"ROC(
Upgrade unique, add 2 outputs [Indices, Counts] and 5 attribute
[return_index, return_inverse, return_counts, axis, is_sorted].
)ROC"
,
paddle
::
framework
::
compatible
::
OpVersionDesc
()
.
NewOutput
(
"Indices"
,
"The indices of the input tensor that result in the "
"unique tensor."
)
.
NewOutput
(
"Counts"
,
"The counts for each unique element."
)
.
NewAttr
(
"return_index"
,
"If True, also return the indices of the input"
" tensor that result in the unique Tensor."
,
false
)
.
NewAttr
(
"return_inverse"
,
"If True, also return the indices for where elements"
" in the original input ended up in the returned unique "
"tensor."
,
false
)
.
NewAttr
(
"return_counts"
,
"If True, also return the counts for each unique element."
,
false
)
.
NewAttr
(
"axis"
,
"The axis to apply unique. If None, the input will be "
"flattened."
,
std
::
vector
<
int
>
{})
.
NewAttr
(
"is_sorted"
,
"If True, the unique elements of X are in ascending order."
"Otherwise, the unique elements are not sorted."
,
false
));
paddle/fluid/operators/unstack_op.cc
浏览文件 @
74894cd7
...
...
@@ -12,12 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/unstack_op.h"
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/infermeta/unary.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -25,43 +27,6 @@ namespace operators {
class
UnStackOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
OP_INOUT_CHECK
(
ctx
->
HasInput
(
"X"
),
"Input"
,
"X"
,
"UnStack"
);
int
axis
=
ctx
->
Attrs
().
Get
<
int
>
(
"axis"
);
int
num
=
ctx
->
Attrs
().
Get
<
int
>
(
"num"
);
auto
x_dim
=
ctx
->
GetInputDim
(
"X"
);
int
rank
=
x_dim
.
size
();
PADDLE_ENFORCE_GE
(
axis
,
-
rank
,
platform
::
errors
::
InvalidArgument
(
"The attribute axis is out of range, it must be "
"inside [-rank, rank), where rank = %d"
,
rank
));
PADDLE_ENFORCE_LT
(
axis
,
rank
,
platform
::
errors
::
InvalidArgument
(
"The attribute axis is out of range, it must be "
"inside [-rank, rank), where rank = %d"
,
rank
));
if
(
axis
<
0
)
axis
+=
rank
;
PADDLE_ENFORCE_EQ
(
ctx
->
Outputs
(
"Y"
).
size
(),
static_cast
<
size_t
>
(
num
),
platform
::
errors
::
InvalidArgument
(
"Number of Outputs(Y) is wrong. Got %d , but it must "
"equal to attribute num which is %d."
,
ctx
->
Outputs
(
"Y"
).
size
(),
static_cast
<
size_t
>
(
num
)));
if
(
x_dim
[
axis
]
>
0
)
{
PADDLE_ENFORCE_EQ
(
num
,
x_dim
[
axis
],
platform
::
errors
::
InvalidArgument
(
"The number of attribute num is not equal to the length of the "
"%d axis of Input(X). Expect %d but got %d."
,
axis
,
x_dim
[
axis
],
num
));
}
auto
vec
=
phi
::
vectorize
<
int
>
(
x_dim
);
vec
.
erase
(
vec
.
begin
()
+
axis
);
ctx
->
SetOutputsDim
(
"Y"
,
std
::
vector
<
framework
::
DDim
>
(
// NOLINT
x_dim
[
axis
],
phi
::
make_ddim
(
vec
)));
}
};
class
UnStackOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
...
...
@@ -141,20 +106,12 @@ class UnStackGradOp : public framework::OperatorWithKernel {
namespace
plat
=
paddle
::
platform
;
namespace
ops
=
paddle
::
operators
;
DECLARE_INFER_SHAPE_FUNCTOR
(
unstack
,
UnStackInferMetaFunctor
,
PD_INFER_META
(
phi
::
UnStackInferMeta
));
REGISTER_OPERATOR
(
unstack
,
ops
::
UnStackOp
,
ops
::
UnStackOpMaker
,
ops
::
UnStackGradOpMaker
<
paddle
::
framework
::
OpDesc
>
,
ops
::
UnStackGradOpMaker
<
paddle
::
imperative
::
OpBase
>
);
ops
::
UnStackGradOpMaker
<
paddle
::
imperative
::
OpBase
>
,
UnStackInferMetaFunctor
);
REGISTER_OPERATOR
(
unstack_grad
,
ops
::
UnStackGradOp
);
REGISTER_OP_CPU_KERNEL
(
unstack
,
ops
::
UnStackKernel
<
plat
::
CPUDeviceContext
,
float
>
,
ops
::
UnStackKernel
<
plat
::
CPUDeviceContext
,
double
>
,
ops
::
UnStackKernel
<
plat
::
CPUDeviceContext
,
int
>
,
ops
::
UnStackKernel
<
plat
::
CPUDeviceContext
,
int64_t
>
);
REGISTER_OP_CPU_KERNEL
(
unstack_grad
,
ops
::
UnStackGradKernel
<
plat
::
CPUDeviceContext
,
float
>
,
ops
::
UnStackGradKernel
<
plat
::
CPUDeviceContext
,
double
>
,
ops
::
UnStackGradKernel
<
plat
::
CPUDeviceContext
,
int
>
,
ops
::
UnStackGradKernel
<
plat
::
CPUDeviceContext
,
int64_t
>
);
paddle/fluid/operators/unstack_op.h
已删除
100644 → 0
浏览文件 @
e559fe41
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/for_range.h"
#if defined(__NVCC__) || defined(__HIPCC__)
#include <thrust/device_vector.h>
#endif
namespace
paddle
{
namespace
operators
{
template
<
typename
VecXType
,
typename
T
>
struct
StackFunctor
{
HOSTDEVICE
StackFunctor
(
const
VecXType
&
x
,
T
*
y
,
int
n
,
int
post
)
:
x_
(
x
),
y_
(
y
),
n_
(
n
),
post_
(
post
)
{}
HOSTDEVICE
void
operator
()(
int
idx
)
{
int
i
=
idx
/
(
n_
*
post_
);
int
which_x
=
idx
/
post_
-
i
*
n_
;
int
x_index
=
i
*
post_
+
idx
%
post_
;
y_
[
idx
]
=
x_
[
which_x
][
x_index
];
}
private:
VecXType
x_
;
T
*
y_
;
int
n_
;
int
post_
;
};
template
<
typename
VecDxType
,
typename
T
>
struct
StackGradFunctor
{
HOSTDEVICE
StackGradFunctor
(
const
VecDxType
&
dx
,
const
T
*
dy
,
int
n
,
int
post
)
:
dx_
(
dx
),
dy_
(
dy
),
n_
(
n
),
post_
(
post
)
{}
HOSTDEVICE
void
operator
()(
int
idx
)
{
int
i
=
idx
/
(
n_
*
post_
);
int
which_x
=
idx
/
post_
-
i
*
n_
;
int
x_index
=
i
*
post_
+
idx
%
post_
;
dx_
[
which_x
][
x_index
]
=
dy_
[
idx
];
}
private:
VecDxType
dx_
;
const
T
*
dy_
;
int
n_
;
int
post_
;
};
template
<
typename
DeviceContext
,
typename
VecXType
,
typename
T
>
static
inline
void
StackFunctorForRange
(
const
DeviceContext
&
ctx
,
const
VecXType
&
x
,
T
*
y
,
int
total_num
,
int
n
,
int
post
)
{
platform
::
ForRange
<
DeviceContext
>
for_range
(
ctx
,
total_num
);
for_range
(
StackFunctor
<
VecXType
,
T
>
(
x
,
y
,
n
,
post
));
}
template
<
typename
DeviceContext
,
typename
VecDxType
,
typename
T
>
static
inline
void
StackGradFunctorForRange
(
const
DeviceContext
&
ctx
,
const
VecDxType
&
dx
,
const
T
*
dy
,
int
total_num
,
int
n
,
int
post
)
{
platform
::
ForRange
<
DeviceContext
>
for_range
(
ctx
,
total_num
);
for_range
(
StackGradFunctor
<
VecDxType
,
T
>
(
dx
,
dy
,
n
,
post
));
}
template
<
typename
DeviceContext
,
typename
T
>
class
UnStackGradKernel
:
public
framework
::
OpKernel
<
T
>
{
using
Tensor
=
framework
::
LoDTensor
;
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
x
=
ctx
.
MultiInput
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
auto
*
y
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
if
(
axis
<
0
)
axis
+=
(
x
[
0
]
->
dims
().
size
()
+
1
);
int
n
=
static_cast
<
int
>
(
x
.
size
());
auto
*
y_data
=
y
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
std
::
vector
<
const
T
*>
x_datas
(
n
);
for
(
int
i
=
0
;
i
<
n
;
i
++
)
x_datas
[
i
]
=
x
[
i
]
->
data
<
T
>
();
int
pre
=
1
;
int
post
=
1
;
auto
&
dim
=
x
[
0
]
->
dims
();
for
(
auto
i
=
0
;
i
<
axis
;
++
i
)
pre
*=
dim
[
i
];
for
(
auto
i
=
axis
;
i
<
dim
.
size
();
++
i
)
post
*=
dim
[
i
];
#if defined(__NVCC__) || defined(__HIPCC__)
int
total_num
=
pre
*
n
*
post
;
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
thrust
::
device_vector
<
const
T
*>
device_x_vec
(
x_datas
);
auto
x_data_arr
=
device_x_vec
.
data
().
get
();
StackFunctorForRange
(
dev_ctx
,
x_data_arr
,
y_data
,
total_num
,
n
,
post
);
// Wait() must be called because device_x_vec may be destructed before
// kernel ends
dev_ctx
.
Wait
();
#else
auto
x_data_arr
=
x_datas
.
data
();
size_t
x_offset
=
0
;
size_t
y_offset
=
0
;
for
(
int
i
=
0
;
i
<
pre
;
i
++
)
{
for
(
int
j
=
0
;
j
<
n
;
j
++
)
{
std
::
memcpy
(
y_data
+
y_offset
,
x_data_arr
[
j
]
+
x_offset
,
post
*
sizeof
(
T
));
y_offset
+=
post
;
}
x_offset
+=
post
;
}
#endif
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
UnStackKernel
:
public
framework
::
OpKernel
<
T
>
{
using
Tensor
=
framework
::
LoDTensor
;
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
dy
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
dx
=
ctx
.
MultiOutput
<
Tensor
>
(
"Y"
);
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
if
(
axis
<
0
)
axis
+=
dy
->
dims
().
size
();
int
n
=
dy
->
dims
()[
axis
];
std
::
vector
<
T
*>
dx_datas
(
n
);
// NOLINT
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
dx_datas
[
i
]
=
dx
[
i
]
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
}
auto
dy_data
=
dy
->
data
<
T
>
();
if
(
dy
->
numel
()
==
0
)
return
;
int
pre
=
1
;
for
(
int
i
=
0
;
i
<
axis
;
++
i
)
pre
*=
dy
->
dims
()[
i
];
int
total_num
=
dy
->
numel
();
int
post
=
total_num
/
(
n
*
pre
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
#if defined(__NVCC__) || defined(__HIPCC__)
thrust
::
device_vector
<
T
*>
device_dx_vec
(
dx_datas
);
auto
dx_data_arr
=
device_dx_vec
.
data
().
get
();
#else
auto
dx_data_arr
=
dx_datas
.
data
();
#endif
StackGradFunctorForRange
(
dev_ctx
,
dx_data_arr
,
dy_data
,
total_num
,
n
,
post
);
#if defined(__NVCC__) || defined(__HIPCC__)
// Wait() must be called because device_dx_vec may be destructed before
// kernel ends
dev_ctx
.
Wait
();
#endif
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/unstack_op_npu.cc
浏览文件 @
74894cd7
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/
operators/unstack_op
.h"
#include "paddle/fluid/
framework/op_registry
.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace
paddle
{
...
...
paddle/phi/core/utils/data_type.h
浏览文件 @
74894cd7
...
...
@@ -44,6 +44,10 @@ namespace phi {
_PhiForEachDataTypeHelper_( \
callback, ::phi::dtype::complex<double>, DataType::COMPLEX128);
#define _PhiForEachDataTypeTiny_(callback) \
_PhiForEachDataTypeHelper_(callback, int, DataType::INT32); \
_PhiForEachDataTypeHelper_(callback, int64_t, DataType::INT64);
template
<
typename
Visitor
>
inline
void
VisitDataType
(
phi
::
DataType
type
,
Visitor
visitor
)
{
#define PhiVisitDataTypeCallback(cpp_type, data_type) \
...
...
@@ -59,4 +63,21 @@ inline void VisitDataType(phi::DataType type, Visitor visitor) {
PADDLE_THROW
(
phi
::
errors
::
Unimplemented
(
"Not supported phi::DataType(%d) as data type."
,
static_cast
<
int
>
(
type
)));
}
template
<
typename
Visitor
>
inline
void
VisitDataTypeTiny
(
phi
::
DataType
type
,
Visitor
visitor
)
{
#define PhiVisitDataTypeCallbackTiny(cpp_type, data_type) \
do { \
if (type == data_type) { \
visitor.template apply<cpp_type>(); \
return; \
} \
} while (0)
_PhiForEachDataTypeTiny_
(
PhiVisitDataTypeCallbackTiny
);
#undef PhiVisitDataTypeCallbackTiny
PADDLE_THROW
(
phi
::
errors
::
Unimplemented
(
"Not supported phi::DataType(%d) as data type."
,
static_cast
<
int
>
(
type
)));
}
}
// namespace phi
paddle/phi/infermeta/multiary.cc
浏览文件 @
74894cd7
...
...
@@ -1167,6 +1167,52 @@ void RnnInferMeta(const MetaTensor& x,
}
}
void
StackInferMeta
(
const
std
::
vector
<
MetaTensor
*>&
x
,
int
axis
,
MetaTensor
*
out
)
{
PADDLE_ENFORCE_GT
(
x
.
size
(),
0UL
,
phi
::
errors
::
InvalidArgument
(
"Number of Inputs(x) must be larger than 0, but"
" received value is:%d."
,
x
.
size
()));
const
auto
&
input_dims
=
GetMetaTensorsDim
(
x
);
for
(
size_t
i
=
1
;
i
<
input_dims
.
size
();
++
i
)
{
PADDLE_ENFORCE_EQ
(
input_dims
[
i
],
input_dims
[
0
],
phi
::
errors
::
InvalidArgument
(
"Dims of all Inputs(X) must be the same, but"
" received input %d dim is:%d not equal to input 0"
" dim:%d."
,
i
,
input_dims
[
i
],
input_dims
[
0
]));
}
int
rank
=
input_dims
[
0
].
size
();
PADDLE_ENFORCE_GE
(
axis
,
-
(
rank
+
1
),
phi
::
errors
::
InvalidArgument
(
"Attr(axis) must be inside [-(rank+1), rank+1), where rank = %d, "
"but received axis is:%d."
,
rank
,
axis
));
PADDLE_ENFORCE_LT
(
axis
,
rank
+
1
,
phi
::
errors
::
InvalidArgument
(
"Attr(axis) must be inside [-(rank+1), rank+1), where rank = %d, "
"but received axis is:%d"
,
rank
,
axis
));
if
(
axis
<
0
)
axis
+=
(
rank
+
1
);
auto
vec
=
phi
::
vectorize
<
int
>
(
input_dims
[
0
]);
vec
.
insert
(
vec
.
begin
()
+
axis
,
input_dims
.
size
());
out
->
set_dims
(
phi
::
make_ddim
(
vec
));
out
->
set_dtype
(
x
.
at
(
0
)
->
dtype
());
out
->
share_lod
(
*
x
.
at
(
0
));
}
void
WarpctcInferMeta
(
const
MetaTensor
&
logits
,
const
MetaTensor
&
label
,
const
paddle
::
optional
<
const
MetaTensor
&>
logits_length
,
...
...
paddle/phi/infermeta/multiary.h
浏览文件 @
74894cd7
...
...
@@ -231,6 +231,10 @@ void RnnInferMeta(const MetaTensor& x,
std
::
vector
<
MetaTensor
*>
state
,
MetaTensor
*
reserve
);
void
StackInferMeta
(
const
std
::
vector
<
MetaTensor
*>&
x
,
int
axis
,
MetaTensor
*
out
);
void
WarpctcInferMeta
(
const
MetaTensor
&
logits
,
const
MetaTensor
&
label
,
const
paddle
::
optional
<
const
MetaTensor
&>
logits_length
,
...
...
paddle/phi/infermeta/ternary.cc
浏览文件 @
74894cd7
...
...
@@ -345,6 +345,56 @@ void PutAlongAxisInferMeta(const MetaTensor& x,
out
->
set_dtype
(
x
.
dtype
());
}
void
RangeInferMeta
(
const
MetaTensor
&
start
,
const
MetaTensor
&
end
,
const
MetaTensor
&
step
,
MetaTensor
*
out
)
{
auto
start_dims
=
start
.
dims
();
auto
end_dims
=
end
.
dims
();
auto
step_dims
=
step
.
dims
();
PADDLE_ENFORCE_EQ
(
start_dims
.
size
(),
1
,
phi
::
errors
::
InvalidArgument
(
"The dim of the shape of Input(Start) should be 1, but got %d"
,
start_dims
.
size
()));
PADDLE_ENFORCE_EQ
(
start_dims
[
0
],
1
,
phi
::
errors
::
InvalidArgument
(
"The first dim of the shape of Input(Start) should "
"be 1, but got %d"
,
start_dims
[
0
]));
PADDLE_ENFORCE_EQ
(
end_dims
.
size
(),
1
,
phi
::
errors
::
InvalidArgument
(
"The dim of the shape of Input(End) should be 1, but got %d"
,
end_dims
.
size
()));
PADDLE_ENFORCE_EQ
(
end_dims
[
0
],
1
,
phi
::
errors
::
InvalidArgument
(
"The first dim of the shape of "
"Input(End) should be 1, but got %d"
,
end_dims
[
0
]));
PADDLE_ENFORCE_EQ
(
step_dims
.
size
(),
1
,
phi
::
errors
::
InvalidArgument
(
"The dim of the shape of Input(Step) should be 1, but got %d"
,
step_dims
.
size
()));
PADDLE_ENFORCE_EQ
(
step_dims
[
0
],
1
,
phi
::
errors
::
InvalidArgument
(
"The first dim of the shape of Input(Step) should "
"be 1, but got %d"
,
step_dims
[
0
]));
out
->
set_dims
({
-
1
});
out
->
set_dtype
(
start
.
dtype
());
}
void
RoiAlignInferMeta
(
const
MetaTensor
&
x
,
const
MetaTensor
&
boxes
,
paddle
::
optional
<
const
MetaTensor
&>
boxes_num
,
...
...
paddle/phi/infermeta/ternary.h
浏览文件 @
74894cd7
...
...
@@ -81,6 +81,11 @@ void PutAlongAxisInferMeta(const MetaTensor& x,
const
std
::
string
&
reduce
,
MetaTensor
*
out
);
void
RangeInferMeta
(
const
MetaTensor
&
start
,
const
MetaTensor
&
end
,
const
MetaTensor
&
step
,
MetaTensor
*
out
);
void
RoiAlignInferMeta
(
const
MetaTensor
&
x
,
const
MetaTensor
&
boxes
,
paddle
::
optional
<
const
MetaTensor
&>
boxes_num
,
...
...
paddle/phi/infermeta/unary.cc
浏览文件 @
74894cd7
...
...
@@ -2552,6 +2552,85 @@ void UnfoldInferMeta(const MetaTensor& x,
out
->
set_dims
(
phi
::
make_ddim
(
out_dims
));
}
void
UniqueInferMeta
(
const
MetaTensor
&
x
,
bool
return_index
,
bool
return_inverse
,
bool
return_counts
,
const
std
::
vector
<
int
>&
axis
,
DataType
dtype
,
MetaTensor
*
out
,
MetaTensor
*
indices
,
MetaTensor
*
index
,
MetaTensor
*
counts
)
{
bool
is_sorted
=
true
;
UniqueRawInferMeta
(
x
,
return_index
,
return_inverse
,
return_counts
,
axis
,
dtype
,
is_sorted
,
out
,
indices
,
index
,
counts
);
}
void
UniqueRawInferMeta
(
const
MetaTensor
&
x
,
bool
return_index
,
bool
return_inverse
,
bool
return_counts
,
const
std
::
vector
<
int
>&
axis
,
DataType
dtype
,
bool
is_sorted
,
MetaTensor
*
out
,
MetaTensor
*
indices
,
MetaTensor
*
index
,
MetaTensor
*
counts
)
{
if
(
!
is_sorted
)
{
PADDLE_ENFORCE_EQ
(
x
.
dims
().
size
(),
1
,
phi
::
errors
::
InvalidArgument
(
"The Input(X) should be 1-D Tensor, "
"But now the dims of Input(X) is %d."
,
x
.
dims
().
size
()));
out
->
set_dims
(
phi
::
make_ddim
({
-
1
}));
index
->
set_dims
(
x
.
dims
());
return
;
}
if
(
axis
.
empty
())
{
out
->
set_dims
(
phi
::
make_ddim
({
-
1
}));
if
(
return_inverse
)
{
index
->
set_dims
(
phi
::
make_ddim
({
phi
::
product
(
x
.
dims
())}));
}
}
else
{
int
axis_value
=
axis
[
0
];
if
(
axis_value
<
0
)
{
axis_value
+=
x
.
dims
().
size
();
}
PADDLE_ENFORCE_LT
(
axis_value
,
x
.
dims
().
size
(),
phi
::
errors
::
InvalidArgument
(
"The axis(%d) should be less than "
"the dimension size(%d) of x."
,
axis_value
,
x
.
dims
().
size
()));
auto
out_dims
=
x
.
dims
();
out_dims
[
axis_value
]
=
-
1
;
out
->
set_dims
(
out_dims
);
if
(
return_inverse
)
{
index
->
set_dims
(
phi
::
make_ddim
({
x
.
dims
()[
axis_value
]}));
}
}
if
(
return_index
)
{
indices
->
set_dims
(
phi
::
make_ddim
({
-
1
}));
}
if
(
return_counts
)
{
counts
->
set_dims
(
phi
::
make_ddim
({
-
1
}));
}
}
void
UnsqueezeInferMeta
(
const
MetaTensor
&
x
,
const
IntArray
&
axes
,
MetaTensor
*
xshape
,
...
...
@@ -2595,6 +2674,53 @@ void UnsqueezeInferMeta(const MetaTensor& x,
xshape
->
set_dtype
(
x
.
dtype
());
}
void
UnStackInferMeta
(
const
MetaTensor
&
x
,
int
axis
,
int
num
,
std
::
vector
<
MetaTensor
*>
outs
)
{
auto
x_dim
=
x
.
dims
();
int
rank
=
x_dim
.
size
();
PADDLE_ENFORCE_GE
(
axis
,
-
rank
,
phi
::
errors
::
InvalidArgument
(
"The attribute axis is out of range, it must be "
"inside [-rank, rank), where rank = %d"
,
rank
));
PADDLE_ENFORCE_LT
(
axis
,
rank
,
phi
::
errors
::
InvalidArgument
(
"The attribute axis is out of range, it must be "
"inside [-rank, rank), where rank = %d"
,
rank
));
if
(
axis
<
0
)
axis
+=
rank
;
size_t
output_count
=
outs
.
size
();
PADDLE_ENFORCE_EQ
(
output_count
,
static_cast
<
size_t
>
(
num
),
phi
::
errors
::
InvalidArgument
(
"Number of Outputs(Y) is wrong. Got %d , but it must "
"equal to attribute num which is %d."
,
output_count
,
static_cast
<
size_t
>
(
num
)));
if
(
x_dim
[
axis
]
>
0
)
{
PADDLE_ENFORCE_EQ
(
num
,
x_dim
[
axis
],
phi
::
errors
::
InvalidArgument
(
"The number of attribute num is not equal to the length of the "
"%d axis of Input(X). Expect %d but got %d."
,
axis
,
x_dim
[
axis
],
num
));
}
auto
vec
=
phi
::
vectorize
<
int
>
(
x_dim
);
vec
.
erase
(
vec
.
begin
()
+
axis
);
for
(
size_t
i
=
0
;
i
<
output_count
;
i
++
)
{
outs
[
i
]
->
set_dims
(
phi
::
make_ddim
(
vec
));
outs
[
i
]
->
set_dtype
(
x
.
dtype
());
}
}
void
OneHotRawInferMeta
(
const
MetaTensor
&
x
,
int32_t
depth
,
DataType
dtype
,
...
...
paddle/phi/infermeta/unary.h
浏览文件 @
74894cd7
...
...
@@ -360,12 +360,40 @@ void UnfoldInferMeta(const MetaTensor& x,
MetaTensor
*
out
,
MetaConfig
config
=
MetaConfig
());
void
UniqueInferMeta
(
const
MetaTensor
&
x
,
bool
return_index
,
bool
return_inverse
,
bool
return_counts
,
const
std
::
vector
<
int
>&
axis
,
DataType
dtype
,
MetaTensor
*
out
,
MetaTensor
*
indices
,
MetaTensor
*
index
,
MetaTensor
*
counts
);
void
UniqueRawInferMeta
(
const
MetaTensor
&
x
,
bool
return_index
,
bool
return_inverse
,
bool
return_counts
,
const
std
::
vector
<
int
>&
axis
,
DataType
dtype
,
bool
is_sorted
,
MetaTensor
*
out
,
MetaTensor
*
indices
,
MetaTensor
*
index
,
MetaTensor
*
counts
);
void
UnsqueezeInferMeta
(
const
MetaTensor
&
x
,
const
IntArray
&
axes
,
MetaTensor
*
xshape
,
MetaTensor
*
out
,
MetaConfig
config
=
MetaConfig
());
void
UnStackInferMeta
(
const
MetaTensor
&
x
,
int
axis
,
int
num
,
std
::
vector
<
MetaTensor
*>
outs
);
void
OneHotRawInferMeta
(
const
MetaTensor
&
x
,
int32_t
depth
,
DataType
dtype
,
...
...
paddle/phi/kernels/cpu/range_kernel.cc
0 → 100644
浏览文件 @
74894cd7
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/kernels/range_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/range_function.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
RangeKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
start
,
const
DenseTensor
&
end
,
const
DenseTensor
&
step
,
DenseTensor
*
out
)
{
T
start_value
=
start
.
data
<
T
>
()[
0
];
T
end_value
=
end
.
data
<
T
>
()[
0
];
T
step_value
=
step
.
data
<
T
>
()[
0
];
int64_t
size
=
0
;
phi
::
funcs
::
GetSize
(
start_value
,
end_value
,
step_value
,
&
size
);
out
->
Resize
(
phi
::
make_ddim
({
size
}));
T
*
out_data
=
dev_ctx
.
template
Alloc
<
T
>(
out
);
T
value
=
start_value
;
for
(
int64_t
i
=
0
;
i
<
size
;
++
i
)
{
out_data
[
i
]
=
value
;
value
+=
step_value
;
}
}
}
// namespace phi
PD_REGISTER_KERNEL
(
range
,
CPU
,
ALL_LAYOUT
,
phi
::
RangeKernel
,
float
,
double
,
int
,
int64_t
)
{}
paddle/phi/kernels/cpu/stack_grad_kernel.cc
0 → 100644
浏览文件 @
74894cd7
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/kernels/stack_grad_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/stack_functor.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
StackGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
out
,
int
axis
,
std
::
vector
<
DenseTensor
*>
x_grad
)
{
if
(
axis
<
0
)
axis
+=
out
.
dims
().
size
();
int
n
=
out
.
dims
()[
axis
];
std
::
vector
<
T
*>
dx_datas
(
n
);
// NOLINT
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
if
(
x_grad
[
i
]
==
nullptr
)
{
dx_datas
[
i
]
=
nullptr
;
}
else
{
dx_datas
[
i
]
=
dev_ctx
.
template
Alloc
<
T
>(
x_grad
[
i
]);
}
}
auto
dy_data
=
out
.
data
<
T
>
();
int
pre
=
1
;
for
(
int
i
=
0
;
i
<
axis
;
++
i
)
pre
*=
out
.
dims
()[
i
];
int
total_num
=
out
.
numel
();
int
post
=
total_num
/
(
n
*
pre
);
auto
dx_data_arr
=
dx_datas
.
data
();
phi
::
funcs
::
StackGradFunctorForRange
(
dev_ctx
,
dx_data_arr
,
dy_data
,
total_num
,
n
,
post
);
}
}
// namespace phi
PD_REGISTER_KERNEL
(
stack_grad
,
CPU
,
ALL_LAYOUT
,
phi
::
StackGradKernel
,
float
,
double
,
int64_t
,
int
,
phi
::
dtype
::
bfloat16
)
{}
paddle/phi/kernels/cpu/stack_kernel.cc
0 → 100644
浏览文件 @
74894cd7
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/kernels/stack_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
StackKernel
(
const
Context
&
dev_ctx
,
const
std
::
vector
<
const
DenseTensor
*>&
x
,
int
axis
,
DenseTensor
*
out
)
{
if
(
axis
<
0
)
axis
+=
(
x
[
0
]
->
dims
().
size
()
+
1
);
int
n
=
static_cast
<
int
>
(
x
.
size
());
T
*
y_data
=
dev_ctx
.
template
Alloc
<
T
>(
out
);
std
::
vector
<
const
T
*>
x_datas
(
n
);
for
(
int
i
=
0
;
i
<
n
;
i
++
)
x_datas
[
i
]
=
x
[
i
]
->
data
<
T
>
();
int
pre
=
1
,
post
=
1
;
auto
&
dim
=
x
[
0
]
->
dims
();
for
(
auto
i
=
0
;
i
<
axis
;
++
i
)
pre
*=
dim
[
i
];
for
(
auto
i
=
axis
;
i
<
dim
.
size
();
++
i
)
post
*=
dim
[
i
];
auto
x_data_arr
=
x_datas
.
data
();
size_t
x_offset
=
0
;
size_t
y_offset
=
0
;
for
(
int
i
=
0
;
i
<
pre
;
i
++
)
{
for
(
int
j
=
0
;
j
<
n
;
j
++
)
{
std
::
memcpy
(
y_data
+
y_offset
,
x_data_arr
[
j
]
+
x_offset
,
post
*
sizeof
(
T
));
y_offset
+=
post
;
}
x_offset
+=
post
;
}
}
}
// namespace phi
PD_REGISTER_KERNEL
(
stack
,
CPU
,
ALL_LAYOUT
,
phi
::
StackKernel
,
float
,
double
,
int
,
int64_t
,
phi
::
dtype
::
bfloat16
)
{}
paddle/phi/kernels/cpu/unique_kernel.cc
0 → 100644
浏览文件 @
74894cd7
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/unique_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/utils/data_type.h"
#include "paddle/phi/kernels/funcs/unique_functor.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
UniqueKernel
(
const
Context
&
context
,
const
DenseTensor
&
x
,
bool
return_index
,
bool
return_inverse
,
bool
return_counts
,
const
std
::
vector
<
int
>&
axis
,
DataType
dtype
,
DenseTensor
*
out
,
DenseTensor
*
indices
,
DenseTensor
*
index
,
DenseTensor
*
counts
)
{
bool
is_sorted
=
true
;
UniqueRawKernel
<
T
,
Context
>
(
context
,
x
,
return_index
,
return_inverse
,
return_counts
,
axis
,
dtype
,
is_sorted
,
out
,
indices
,
index
,
counts
);
}
template
<
typename
T
,
typename
Context
>
void
UniqueRawKernel
(
const
Context
&
context
,
const
DenseTensor
&
x
,
bool
return_index
,
bool
return_inverse
,
bool
return_counts
,
const
std
::
vector
<
int
>&
axis
,
DataType
dtype
,
bool
is_sorted
,
DenseTensor
*
out
,
DenseTensor
*
indices
,
DenseTensor
*
index
,
DenseTensor
*
counts
)
{
if
(
dtype
==
phi
::
DataType
::
INT32
)
{
PADDLE_ENFORCE_LE
(
x
.
numel
(),
INT_MAX
,
phi
::
errors
::
InvalidArgument
(
"The number of elements in Input(X) should be less than or "
"equal to INT_MAX, but received num is %d. Please set `dtype` to "
"int64."
,
x
.
numel
()));
}
if
(
!
is_sorted
)
{
phi
::
VisitDataType
(
dtype
,
phi
::
funcs
::
UniqueOpFunctor
<
Context
,
T
>
(
context
,
out
,
index
,
&
x
));
return
;
}
if
(
x
.
numel
()
==
0
)
{
context
.
template
Alloc
<
T
>(
out
);
return
;
}
if
(
axis
.
empty
())
{
phi
::
VisitDataTypeTiny
(
dtype
,
phi
::
funcs
::
UniqueFlattendTensorFunctor
<
Context
,
T
>
(
context
,
x
,
out
,
indices
,
index
,
counts
,
return_index
,
return_inverse
,
return_counts
));
}
else
{
int
axis_value
=
axis
[
0
];
phi
::
VisitDataTypeTiny
(
dtype
,
phi
::
funcs
::
UniqueDimFunctor
<
Context
,
T
>
(
context
,
x
,
out
,
indices
,
index
,
counts
,
axis_value
,
return_index
,
return_inverse
,
return_counts
));
}
}
}
// namespace phi
PD_REGISTER_KERNEL
(
unique
,
CPU
,
ALL_LAYOUT
,
phi
::
UniqueKernel
,
float
,
double
,
int32_t
,
int64_t
)
{}
PD_REGISTER_KERNEL
(
unique_raw
,
CPU
,
ALL_LAYOUT
,
phi
::
UniqueRawKernel
,
float
,
double
,
int32_t
,
int64_t
)
{}
paddle/
fluid/operators/unstack_op.cu
→
paddle/
phi/kernels/cpu/unstack_grad_kernel.cc
浏览文件 @
74894cd7
/* Copyright (c) 20
19
PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 20
22
PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
...
...
@@ -12,21 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/unstack_op.h"
#include "paddle/phi/kernels/unstack_grad_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/unstack_grad_kernel_impl.h"
namespace
plat
=
paddle
::
platform
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
unstack
,
ops
::
UnStackKernel
<
plat
::
CUDADeviceContext
,
float
>
,
ops
::
UnStackKernel
<
plat
::
CUDADeviceContext
,
double
>
,
ops
::
UnStackKernel
<
plat
::
CUDADeviceContext
,
int
>
,
ops
::
UnStackKernel
<
plat
::
CUDADeviceContext
,
int64_t
>
,
ops
::
UnStackKernel
<
plat
::
CUDADeviceContext
,
plat
::
float16
>
);
REGISTER_OP_CUDA_KERNEL
(
unstack_grad
,
ops
::
UnStackGradKernel
<
plat
::
CUDADeviceContext
,
float
>
,
ops
::
UnStackGradKernel
<
plat
::
CUDADeviceContext
,
double
>
,
ops
::
UnStackGradKernel
<
plat
::
CUDADeviceContext
,
int
>
,
ops
::
UnStackGradKernel
<
plat
::
CUDADeviceContext
,
int64_t
>
,
ops
::
UnStackGradKernel
<
plat
::
CUDADeviceContext
,
plat
::
float16
>
);
PD_REGISTER_KERNEL
(
unstack_grad
,
CPU
,
ALL_LAYOUT
,
phi
::
UnStackGradKernel
,
float
,
double
,
int
,
int64_t
)
{}
paddle/phi/kernels/cpu/unstack_kernel.cc
0 → 100644
浏览文件 @
74894cd7
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/kernels/unstack_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/unstack_kernel_impl.h"
PD_REGISTER_KERNEL
(
unstack
,
CPU
,
ALL_LAYOUT
,
phi
::
UnStackKernel
,
float
,
double
,
int
,
int64_t
)
{
}
paddle/phi/kernels/funcs/range_function.h
0 → 100644
浏览文件 @
74894cd7
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/enforce.h"
namespace
phi
{
namespace
funcs
{
template
<
typename
T
>
void
GetSize
(
T
start
,
T
end
,
T
step
,
int64_t
*
size
)
{
PADDLE_ENFORCE_NE
(
step
,
0
,
phi
::
errors
::
InvalidArgument
(
"The step of range op should not be 0."
));
if
(
start
<
end
)
{
PADDLE_ENFORCE_GT
(
step
,
0
,
phi
::
errors
::
InvalidArgument
(
"The step should be greater than 0 while start < end."
));
}
if
(
start
>
end
)
{
PADDLE_ENFORCE_LT
(
step
,
0
,
phi
::
errors
::
InvalidArgument
(
"The step should be less than 0 while start > end."
));
}
*
size
=
std
::
is_integral
<
T
>::
value
?
((
std
::
abs
(
end
-
start
)
+
std
::
abs
(
step
)
-
1
)
/
std
::
abs
(
step
))
:
std
::
ceil
(
std
::
abs
((
end
-
start
)
/
step
));
}
}
// namespace funcs
}
// namespace phi
paddle/
fluid/operators/stack_op
.h
→
paddle/
phi/kernels/funcs/stack_functor
.h
浏览文件 @
74894cd7
// Copyright (c) 20
18
PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 20
22
PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
...
...
@@ -14,12 +14,29 @@
#pragma once
#include <memory>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/phi/kernels/funcs/for_range.h"
namespace
paddle
{
namespace
operators
{
namespace
phi
{
namespace
funcs
{
template
<
typename
VecXType
,
typename
T
>
struct
StackFunctor
{
HOSTDEVICE
StackFunctor
(
const
VecXType
&
x
,
T
*
y
,
int
n
,
int
post
)
:
x_
(
x
),
y_
(
y
),
n_
(
n
),
post_
(
post
)
{}
HOSTDEVICE
void
operator
()(
int
idx
)
{
int
i
=
idx
/
(
n_
*
post_
);
int
which_x
=
idx
/
post_
-
i
*
n_
;
int
x_index
=
i
*
post_
+
idx
%
post_
;
y_
[
idx
]
=
x_
[
which_x
][
x_index
];
}
private:
VecXType
x_
;
T
*
y_
;
int
n_
;
int
post_
;
};
template
<
typename
VecDxType
,
typename
T
>
struct
StackGradFunctor
{
...
...
@@ -40,81 +57,27 @@ struct StackGradFunctor {
int
post_
;
};
template
<
typename
DeviceContext
,
typename
VecXType
,
typename
T
>
static
inline
void
StackFunctorForRange
(
const
DeviceContext
&
ctx
,
const
VecXType
&
x
,
T
*
y
,
int
total_num
,
int
n
,
int
post
)
{
phi
::
funcs
::
ForRange
<
DeviceContext
>
for_range
(
ctx
,
total_num
);
for_range
(
StackFunctor
<
VecXType
,
T
>
(
x
,
y
,
n
,
post
));
}
template
<
typename
DeviceContext
,
typename
VecDxType
,
typename
T
>
static
inline
void
StackGradFunctorForRange
(
const
DeviceContext
&
ctx
,
const
VecDxType
&
dx
,
const
T
*
dy
,
int
total_num
,
int
n
,
int
post
)
{
platform
::
ForRange
<
DeviceContext
>
for_range
(
ctx
,
total_num
);
const
VecDxType
&
dx
,
const
T
*
dy
,
int
total_num
,
int
n
,
int
post
)
{
phi
::
funcs
::
ForRange
<
DeviceContext
>
for_range
(
ctx
,
total_num
);
for_range
(
StackGradFunctor
<
VecDxType
,
T
>
(
dx
,
dy
,
n
,
post
));
}
template
<
typename
DeviceContext
,
typename
T
>
class
StackKernel
:
public
framework
::
OpKernel
<
T
>
{
using
Tensor
=
framework
::
LoDTensor
;
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
x
=
ctx
.
MultiInput
<
Tensor
>
(
"X"
);
auto
*
y
=
ctx
.
Output
<
Tensor
>
(
"Y"
);
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
if
(
axis
<
0
)
axis
+=
(
x
[
0
]
->
dims
().
size
()
+
1
);
int
n
=
static_cast
<
int
>
(
x
.
size
());
auto
*
y_data
=
y
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
std
::
vector
<
const
T
*>
x_datas
(
n
);
for
(
int
i
=
0
;
i
<
n
;
i
++
)
x_datas
[
i
]
=
x
[
i
]
->
data
<
T
>
();
int
pre
=
1
,
post
=
1
;
auto
&
dim
=
x
[
0
]
->
dims
();
for
(
auto
i
=
0
;
i
<
axis
;
++
i
)
pre
*=
dim
[
i
];
for
(
auto
i
=
axis
;
i
<
dim
.
size
();
++
i
)
post
*=
dim
[
i
];
auto
x_data_arr
=
x_datas
.
data
();
size_t
x_offset
=
0
;
size_t
y_offset
=
0
;
for
(
int
i
=
0
;
i
<
pre
;
i
++
)
{
for
(
int
j
=
0
;
j
<
n
;
j
++
)
{
std
::
memcpy
(
y_data
+
y_offset
,
x_data_arr
[
j
]
+
x_offset
,
post
*
sizeof
(
T
));
y_offset
+=
post
;
}
x_offset
+=
post
;
}
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
StackGradKernel
:
public
framework
::
OpKernel
<
T
>
{
using
Tensor
=
framework
::
LoDTensor
;
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
dy
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
auto
dx
=
ctx
.
MultiOutput
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
if
(
axis
<
0
)
axis
+=
dy
->
dims
().
size
();
int
n
=
dy
->
dims
()[
axis
];
std
::
vector
<
T
*>
dx_datas
(
n
);
// NOLINT
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
if
(
dx
[
i
]
==
nullptr
)
{
dx_datas
[
i
]
=
nullptr
;
}
else
{
dx_datas
[
i
]
=
dx
[
i
]
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
}
}
auto
dy_data
=
dy
->
data
<
T
>
();
int
pre
=
1
;
for
(
int
i
=
0
;
i
<
axis
;
++
i
)
pre
*=
dy
->
dims
()[
i
];
int
total_num
=
dy
->
numel
();
int
post
=
total_num
/
(
n
*
pre
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
auto
dx_data_arr
=
dx_datas
.
data
();
StackGradFunctorForRange
(
dev_ctx
,
dx_data_arr
,
dy_data
,
total_num
,
n
,
post
);
}
};
}
// namespace operators
}
// namespace paddle
}
// namespace funcs
}
// namespace phi
paddle/phi/kernels/funcs/unique_functor.h
0 → 100644
浏览文件 @
74894cd7
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/kernels/funcs/concat_and_split_functor.h"
#include "paddle/phi/kernels/funcs/math_function.h"
namespace
phi
{
namespace
funcs
{
template
<
typename
Context
,
typename
InT
>
struct
UniqueOpFunctor
{
const
Context
&
context_
;
DenseTensor
*
out_
;
DenseTensor
*
index_
;
const
DenseTensor
*
in_
;
DenseTensor
*
count_
;
UniqueOpFunctor
(
const
Context
&
context
,
DenseTensor
*
out
,
DenseTensor
*
index
,
const
DenseTensor
*
in
,
DenseTensor
*
count
=
nullptr
)
:
context_
(
context
),
out_
(
out
),
index_
(
index
),
in_
(
in
),
count_
(
count
)
{}
template
<
typename
IndexT
>
void
apply
()
const
{
auto
*
in_data
=
in_
->
data
<
InT
>
();
auto
*
index_data
=
context_
.
template
Alloc
<
IndexT
>(
index_
);
int64_t
j
=
0
;
// TODO(fangzeyang): Should optimize performance here.
std
::
unordered_map
<
InT
,
int64_t
>
dict
;
std
::
vector
<
InT
>
uniq
;
PADDLE_ENFORCE_LT
(
in_
->
numel
(),
pow
(
2
,
31
),
phi
::
errors
::
InvalidArgument
(
"The num of Input(X) elements should be less then INT_MAX, "
"but received num is %d."
,
in_
->
numel
()));
for
(
auto
i
=
0
;
i
<
in_
->
numel
();
i
++
)
{
auto
it
=
dict
.
find
(
in_data
[
i
]);
if
(
it
==
dict
.
end
())
{
dict
.
emplace
(
std
::
make_pair
(
in_data
[
i
],
j
));
uniq
.
emplace_back
(
in_data
[
i
]);
index_data
[
i
]
=
static_cast
<
IndexT
>
(
j
);
j
++
;
}
else
{
index_data
[
i
]
=
static_cast
<
IndexT
>
(
it
->
second
);
}
}
if
(
count_
!=
nullptr
)
{
// Resize the count tensor dims to allocate the memory
count_
->
Resize
(
phi
::
make_ddim
({
static_cast
<
int64_t
>
(
uniq
.
size
())}));
IndexT
*
count_data
=
context_
.
template
Alloc
<
IndexT
>(
count_
);
// init count_data to 0
memset
(
count_data
,
0
,
uniq
.
size
()
*
sizeof
(
IndexT
));
const
auto
&
index_type
=
index_
->
dtype
();
bool
index_type_match
=
index_type
==
DataType
::
INT32
||
index_type
==
DataType
::
INT64
;
PADDLE_ENFORCE_EQ
(
index_type_match
,
true
,
phi
::
errors
::
InvalidArgument
(
"Index holds the wrong type, it holds %s, "
"but desires to be %s or %s"
,
paddle
::
framework
::
DataTypeToString
(
paddle
::
framework
::
TransToProtoVarType
(
index_type
)),
paddle
::
framework
::
DataTypeToString
(
paddle
::
framework
::
TransToProtoVarType
(
DataType
::
INT32
)),
paddle
::
framework
::
DataTypeToString
(
paddle
::
framework
::
TransToProtoVarType
(
DataType
::
INT64
))));
if
(
index_type
==
DataType
::
INT32
)
{
for
(
auto
i
=
0
;
i
<
in_
->
numel
();
++
i
)
{
const
IndexT
&
index
=
index_data
[
i
];
count_data
[
static_cast
<
int32_t
>
(
index
)]
+=
static_cast
<
IndexT
>
(
1
);
}
}
else
{
for
(
auto
i
=
0
;
i
<
in_
->
numel
();
++
i
)
{
const
IndexT
&
index
=
index_data
[
i
];
count_data
[
static_cast
<
int64_t
>
(
index
)]
+=
static_cast
<
IndexT
>
(
1
);
}
}
}
out_
->
Resize
(
phi
::
make_ddim
({
static_cast
<
int64_t
>
(
uniq
.
size
())}));
auto
*
out_data
=
context_
.
template
Alloc
<
InT
>(
out_
);
std
::
memcpy
(
out_data
,
uniq
.
data
(),
uniq
.
size
()
*
sizeof
(
InT
));
}
};
static
std
::
vector
<
DenseTensor
>
Unbind
(
const
DenseTensor
&
in
)
{
int64_t
size
=
in
.
dims
()[
0
];
std
::
vector
<
DenseTensor
>
tensors
(
size
);
for
(
int64_t
i
=
0
;
i
<
size
;
++
i
)
{
tensors
[
i
]
=
in
.
Slice
(
i
,
i
+
1
);
}
return
tensors
;
}
template
<
typename
T
>
static
bool
Equal
(
const
DenseTensor
&
a
,
const
DenseTensor
&
b
)
{
if
(
a
.
numel
()
!=
b
.
numel
())
{
return
false
;
}
for
(
int64_t
i
=
0
;
i
<
a
.
numel
();
++
i
)
{
if
(
a
.
data
<
T
>
()[
i
]
!=
b
.
data
<
T
>
()[
i
])
{
return
false
;
}
}
return
true
;
}
template
<
typename
Context
,
typename
InT
,
typename
IndexT
>
static
void
UniqueFlattendTensor
(
const
Context
&
context
,
const
DenseTensor
&
in
,
DenseTensor
*
out
,
DenseTensor
*
indices
,
DenseTensor
*
index
,
DenseTensor
*
count
,
bool
return_index
,
bool
return_inverse
,
bool
return_counts
)
{
const
InT
*
in_data
=
in
.
data
<
InT
>
();
std
::
set
<
InT
>
unique
(
in_data
,
in_data
+
in
.
numel
());
out
->
Resize
(
phi
::
make_ddim
({
static_cast
<
int64_t
>
(
unique
.
size
())}));
auto
*
out_data
=
context
.
template
Alloc
<
InT
>(
out
);
std
::
copy
(
unique
.
begin
(),
unique
.
end
(),
out_data
);
if
(
return_index
)
{
indices
->
Resize
(
phi
::
make_ddim
({
out
->
numel
()}));
auto
indices_data
=
context
.
template
Alloc
<
IndexT
>(
indices
);
std
::
unordered_map
<
InT
,
IndexT
>
indices_map
;
indices_map
.
reserve
(
out
->
numel
());
for
(
int64_t
i
=
0
;
i
<
in
.
numel
();
++
i
)
{
if
(
indices_map
.
find
(
in_data
[
i
])
!=
indices_map
.
end
())
continue
;
indices_map
[
in_data
[
i
]]
=
i
;
}
for
(
int64_t
i
=
0
;
i
<
out
->
numel
();
++
i
)
{
indices_data
[
i
]
=
indices_map
[
out_data
[
i
]];
}
}
if
(
return_inverse
)
{
index
->
Resize
(
phi
::
make_ddim
({
in
.
numel
()}));
auto
inverse_data
=
context
.
template
Alloc
<
IndexT
>(
index
);
std
::
unordered_map
<
InT
,
IndexT
>
inverse_map
;
inverse_map
.
reserve
(
out
->
numel
());
for
(
int64_t
i
=
0
;
i
<
out
->
numel
();
++
i
)
{
inverse_map
[
out_data
[
i
]]
=
i
;
}
for
(
int64_t
i
=
0
;
i
<
in
.
numel
();
++
i
)
{
inverse_data
[
i
]
=
inverse_map
[
in_data
[
i
]];
}
}
if
(
return_counts
)
{
count
->
Resize
(
phi
::
make_ddim
({
out
->
numel
()}));
auto
count_data
=
context
.
template
Alloc
<
IndexT
>(
count
);
std
::
unordered_map
<
InT
,
IndexT
>
counts_map
;
counts_map
.
reserve
(
out
->
numel
());
for
(
int64_t
i
=
0
;
i
<
out
->
numel
();
++
i
)
{
counts_map
[
out_data
[
i
]]
=
0
;
}
for
(
int64_t
i
=
0
;
i
<
in
.
numel
();
i
++
)
{
counts_map
[
in_data
[
i
]]
+=
1
;
}
for
(
int64_t
i
=
0
;
i
<
out
->
numel
();
i
++
)
{
count_data
[
i
]
=
counts_map
[
out_data
[
i
]];
}
}
}
template
<
typename
Context
,
typename
ForwardIt
,
typename
InT
,
typename
IndexT
>
static
ForwardIt
UniqueDimImpl
(
const
Context
&
context
,
ForwardIt
first
,
ForwardIt
last
,
const
std
::
vector
<
IndexT
>&
sorted_indices_vec
,
std
::
vector
<
IndexT
>*
inverse_vec
,
std
::
vector
<
IndexT
>*
counts_vec
,
std
::
vector
<
IndexT
>*
indices_vec
)
{
if
(
first
==
last
)
{
return
last
;
}
(
*
inverse_vec
)[
sorted_indices_vec
[
0
]]
=
0
;
(
*
counts_vec
)[
0
]
=
1
;
(
*
indices_vec
)[
0
]
=
sorted_indices_vec
[
0
];
ForwardIt
begin
=
first
;
ForwardIt
result
=
first
;
while
(
++
first
!=
last
)
{
int64_t
idx_first
=
std
::
distance
(
begin
,
first
);
int64_t
idx_result
=
std
::
distance
(
begin
,
result
);
if
(
!
Equal
<
InT
>
(
*
result
,
*
first
))
{
if
(
++
result
!=
first
)
{
*
result
=
std
::
move
(
*
first
);
}
idx_result
+=
1
;
(
*
indices_vec
)[
idx_result
]
=
sorted_indices_vec
[
idx_first
];
}
(
*
inverse_vec
)[
sorted_indices_vec
[
idx_first
]]
=
idx_result
;
(
*
counts_vec
)[
idx_result
]
+=
1
;
}
return
++
result
;
}
template
<
typename
Context
,
typename
InT
,
typename
IndexT
>
static
void
UniqueDim
(
const
Context
&
context
,
const
DenseTensor
&
in
,
DenseTensor
*
out
,
DenseTensor
*
indices
,
DenseTensor
*
index
,
DenseTensor
*
count
,
bool
return_index
,
bool
return_inverse
,
bool
return_counts
,
int
axis
)
{
// transpose tensor: eg. axis=1, [dim0, dim1, dim2] -> [dim1, dim0, dim2]
std
::
vector
<
int
>
permute
(
in
.
dims
().
size
());
std
::
iota
(
permute
.
begin
(),
permute
.
end
(),
0
);
permute
[
axis
]
=
0
;
permute
[
0
]
=
axis
;
std
::
vector
<
int64_t
>
in_trans_dims_vec
(
phi
::
vectorize
(
in
.
dims
()));
in_trans_dims_vec
[
axis
]
=
in
.
dims
()[
0
];
in_trans_dims_vec
[
0
]
=
in
.
dims
()[
axis
];
DenseTensor
in_trans
;
phi
::
DDim
in_trans_dims
=
phi
::
make_ddim
(
in_trans_dims_vec
);
in_trans
.
Resize
(
in_trans_dims
);
context
.
template
Alloc
<
InT
>(
&
in_trans
);
TransCompute
<
Context
,
InT
>
(
in
.
dims
().
size
(),
context
,
in
,
&
in_trans
,
permute
);
// reshape tensor: eg. [dim1, dim0, dim2] -> [dim1, dim0*dim2]
phi
::
DDim
in_trans_flat_dims
=
phi
::
flatten_to_2d
(
in_trans_dims
,
1
);
in_trans
.
Resize
(
in_trans_flat_dims
);
// sort indices
std
::
vector
<
IndexT
>
sorted_indices_vec
(
in_trans
.
dims
()[
0
]);
std
::
iota
(
sorted_indices_vec
.
begin
(),
sorted_indices_vec
.
end
(),
0
);
int64_t
col
=
in_trans
.
dims
()[
1
];
const
InT
*
in_trans_data
=
in_trans
.
data
<
InT
>
();
std
::
sort
(
sorted_indices_vec
.
begin
(),
sorted_indices_vec
.
end
(),
[
&
](
int64_t
a
,
int64_t
b
)
->
bool
{
for
(
int64_t
i
=
0
;
i
<
col
;
++
i
)
{
InT
lhs
=
in_trans_data
[
i
+
a
*
col
];
InT
rhs
=
in_trans_data
[
i
+
b
*
col
];
if
(
lhs
<
rhs
)
{
return
true
;
}
else
if
(
lhs
>
rhs
)
{
return
false
;
}
}
return
false
;
});
// sort tensor according to indices
DenseTensor
input_sorted
;
input_sorted
.
Resize
(
in_trans_dims
);
context
.
template
Alloc
<
InT
>(
&
input_sorted
);
InT
*
input_sorted_data
=
input_sorted
.
data
<
InT
>
();
for
(
size_t
i
=
0
;
i
<
sorted_indices_vec
.
size
();
++
i
)
{
memcpy
(
input_sorted_data
+
i
*
col
,
in_trans_data
+
static_cast
<
int64_t
>
(
sorted_indices_vec
[
i
])
*
col
,
col
*
sizeof
(
InT
));
}
std
::
vector
<
DenseTensor
>
input_unbind
=
Unbind
(
input_sorted
);
std
::
vector
<
IndexT
>
inverse_vec
(
sorted_indices_vec
.
size
(),
0
);
std
::
vector
<
IndexT
>
counts_vec
(
sorted_indices_vec
.
size
(),
0
);
std
::
vector
<
IndexT
>
indices_vec
(
sorted_indices_vec
.
size
(),
0
);
auto
last
=
UniqueDimImpl
<
Context
,
std
::
vector
<
DenseTensor
>::
iterator
,
InT
>
(
context
,
input_unbind
.
begin
(),
input_unbind
.
end
(),
sorted_indices_vec
,
&
inverse_vec
,
&
counts_vec
,
&
indices_vec
);
input_unbind
.
erase
(
last
,
input_unbind
.
end
());
counts_vec
.
erase
(
counts_vec
.
begin
()
+
input_unbind
.
size
(),
counts_vec
.
end
());
indices_vec
.
erase
(
indices_vec
.
begin
()
+
input_unbind
.
size
(),
indices_vec
.
end
());
phi
::
funcs
::
ConcatFunctor
<
Context
,
InT
>
concat_functor
;
DenseTensor
out_trans
;
std
::
vector
<
int64_t
>
out_trans_dims_vec
=
in_trans_dims_vec
;
out_trans_dims_vec
[
0
]
=
input_unbind
.
size
();
out_trans
.
Resize
(
phi
::
make_ddim
(
out_trans_dims_vec
));
context
.
template
Alloc
<
InT
>(
&
out_trans
);
std
::
swap
(
out_trans_dims_vec
[
0
],
out_trans_dims_vec
[
axis
]);
out
->
Resize
(
phi
::
make_ddim
(
out_trans_dims_vec
));
context
.
template
Alloc
<
InT
>(
out
);
concat_functor
(
context
,
input_unbind
,
0
,
&
out_trans
);
TransCompute
<
Context
,
InT
>
(
out_trans
.
dims
().
size
(),
context
,
out_trans
,
out
,
permute
);
if
(
return_inverse
)
{
paddle
::
framework
::
TensorFromVector
(
inverse_vec
,
context
,
index
);
}
if
(
return_counts
)
{
paddle
::
framework
::
TensorFromVector
(
counts_vec
,
context
,
count
);
}
if
(
return_index
)
{
paddle
::
framework
::
TensorFromVector
(
indices_vec
,
context
,
indices
);
}
}
template
<
typename
Context
,
typename
InT
>
struct
UniqueFlattendTensorFunctor
{
const
Context
&
ctx_
;
/* */
const
DenseTensor
&
in_
;
DenseTensor
*
out_
;
DenseTensor
*
indices_
;
DenseTensor
*
index_
;
DenseTensor
*
count_
;
const
bool
return_index_
;
const
bool
return_inverse_
;
const
bool
return_counts_
;
UniqueFlattendTensorFunctor
(
const
Context
&
context
,
const
DenseTensor
&
in
,
DenseTensor
*
out
,
DenseTensor
*
indices
,
DenseTensor
*
index
,
DenseTensor
*
count
,
bool
return_index
,
bool
return_inverse
,
bool
return_counts
)
:
ctx_
(
context
),
in_
(
in
),
out_
(
out
),
indices_
(
indices
),
index_
(
index
),
count_
(
count
),
return_index_
(
return_index
),
return_inverse_
(
return_inverse
),
return_counts_
(
return_counts
)
{}
template
<
typename
IndexT
>
void
apply
()
const
{
UniqueFlattendTensor
<
Context
,
InT
,
IndexT
>
(
ctx_
,
in_
,
out_
,
indices_
,
index_
,
count_
,
return_index_
,
return_inverse_
,
return_counts_
);
}
};
template
<
typename
Context
,
typename
InT
>
struct
UniqueDimFunctor
{
const
Context
&
ctx_
;
const
DenseTensor
&
in_
;
DenseTensor
*
out_
;
DenseTensor
*
indices_
;
DenseTensor
*
index_
;
DenseTensor
*
count_
;
const
int
axis_
;
const
bool
return_index_
;
const
bool
return_inverse_
;
const
bool
return_counts_
;
UniqueDimFunctor
(
const
Context
&
context
,
const
DenseTensor
&
in
,
DenseTensor
*
out
,
DenseTensor
*
indices
,
DenseTensor
*
index
,
DenseTensor
*
count
,
const
int
axis
,
bool
return_index
,
bool
return_inverse
,
bool
return_counts
)
:
ctx_
(
context
),
in_
(
in
),
out_
(
out
),
indices_
(
indices
),
index_
(
index
),
count_
(
count
),
axis_
(
axis
),
return_index_
(
return_index
),
return_inverse_
(
return_inverse
),
return_counts_
(
return_counts
)
{}
template
<
typename
IndexT
>
void
apply
()
const
{
UniqueDim
<
Context
,
InT
,
IndexT
>
(
ctx_
,
in_
,
out_
,
indices_
,
index_
,
count_
,
return_index_
,
return_inverse_
,
return_counts_
,
axis_
);
}
};
}
// namespace funcs
}
// namespace phi
paddle/phi/kernels/gpu/range_kernel.cu
0 → 100644
浏览文件 @
74894cd7
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/range_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/copy_kernel.h"
#include "paddle/phi/kernels/funcs/range_function.h"
namespace
phi
{
template
<
typename
T
>
__global__
void
Range
(
T
start
,
T
step
,
int64_t
size
,
T
*
out
)
{
CUDA_KERNEL_LOOP
(
index
,
size
)
{
out
[
index
]
=
start
+
step
*
index
;
}
}
template
<
typename
T
,
typename
Context
>
void
RangeKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
start
,
const
DenseTensor
&
end
,
const
DenseTensor
&
step
,
DenseTensor
*
out
)
{
T
start_value
=
start
.
data
<
T
>
()[
0
];
T
end_value
=
end
.
data
<
T
>
()[
0
];
T
step_value
=
step
.
data
<
T
>
()[
0
];
int64_t
size
=
0
;
phi
::
funcs
::
GetSize
(
start_value
,
end_value
,
step_value
,
&
size
);
out
->
Resize
(
phi
::
make_ddim
({
size
}));
T
*
out_data
=
dev_ctx
.
template
Alloc
<
T
>(
out
);
auto
stream
=
dev_ctx
.
stream
();
int
block
=
std
::
min
(
size
,
static_cast
<
int64_t
>
(
256
));
int
grid
=
(
size
+
block
-
1
)
/
block
;
Range
<
T
><<<
grid
,
block
,
0
,
stream
>>>
(
start_value
,
step_value
,
size
,
out_data
);
}
}
// namespace phi
PD_REGISTER_KERNEL
(
range
,
GPU
,
ALL_LAYOUT
,
phi
::
RangeKernel
,
float
,
double
,
int64_t
,
int
)
{
kernel
->
InputAt
(
0
).
SetBackend
(
phi
::
Backend
::
CPU
);
kernel
->
InputAt
(
1
).
SetBackend
(
phi
::
Backend
::
CPU
);
kernel
->
InputAt
(
2
).
SetBackend
(
phi
::
Backend
::
CPU
);
}
paddle/phi/kernels/gpu/stack_grad_kernel.cu
0 → 100644
浏览文件 @
74894cd7
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/stack_grad_kernel.h"
#include "paddle/fluid/memory/memory.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/core/kernel_registry.h"
namespace
phi
{
template
<
typename
T
,
typename
IntType
>
__global__
void
UnStackHelperCUDAKernel
(
const
T
*
__restrict__
input
,
int
pre_dim_size
,
int
split_dim_size
,
int
suf_dim_size
,
int
num_split
,
T
**
output_ptrs
)
{
assert
(
blockDim
.
y
==
1
);
assert
(
blockDim
.
z
==
1
);
// In this case they are equal
assert
(
split_dim_size
%
num_split
==
0
);
IntType
size
=
pre_dim_size
*
split_dim_size
*
suf_dim_size
;
IntType
each_dim_size
=
split_dim_size
/
num_split
;
for
(
IntType
offset
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
offset
<
size
;
offset
+=
blockDim
.
x
*
gridDim
.
x
)
{
IntType
i
=
offset
/
(
split_dim_size
*
suf_dim_size
);
IntType
j
=
(
offset
%
(
split_dim_size
*
suf_dim_size
))
/
suf_dim_size
;
IntType
k
=
offset
%
suf_dim_size
;
T
*
output
=
output_ptrs
[
j
/
each_dim_size
];
if
(
output
==
nullptr
)
{
return
;
}
IntType
output_ind
=
i
*
each_dim_size
*
suf_dim_size
+
(
j
%
each_dim_size
)
*
suf_dim_size
+
k
;
*
(
output
+
output_ind
)
=
input
[
offset
];
}
}
template
<
typename
T
,
typename
Context
>
void
StackGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
out
,
int
axis
,
std
::
vector
<
DenseTensor
*>
x_grad
)
{
if
(
axis
<
0
)
axis
+=
out
.
dims
().
size
();
int
n
=
out
.
dims
()[
axis
];
PADDLE_ENFORCE_EQ
(
n
,
x_grad
.
size
(),
phi
::
errors
::
InvalidArgument
(
"Output x_grad size should be equal to n, but"
" received n is:%d x_grad size is:%d."
,
n
,
x_grad
.
size
()));
// x_grad is output, so save each data address, then copy each dy into dx_data
std
::
vector
<
T
*>
outputs
(
n
);
for
(
size_t
j
=
0
;
j
<
x_grad
.
size
();
++
j
)
{
if
(
x_grad
[
j
]
==
nullptr
)
{
outputs
[
j
]
=
nullptr
;
continue
;
}
if
(
x_grad
[
j
]
->
numel
()
!=
0UL
)
{
T
*
ptr
=
dev_ctx
.
template
Alloc
<
T
>(
x_grad
[
j
]);
outputs
[
j
]
=
ptr
;
}
else
{
outputs
[
j
]
=
nullptr
;
}
}
auto
dy_data
=
out
.
data
<
T
>
();
// each x_grad should have same shape
int
dy_pre
=
1
,
dy_suf
=
1
;
auto
dy_dims
=
out
.
dims
();
int
split_dim
=
n
;
for
(
int
i
=
0
;
i
<
axis
;
++
i
)
{
dy_pre
*=
dy_dims
[
i
];
}
dy_suf
=
out
.
numel
()
/
(
split_dim
*
dy_pre
);
auto
tmp_out_data
=
paddle
::
memory
::
Alloc
(
dev_ctx
,
outputs
.
size
()
*
sizeof
(
T
*
));
paddle
::
memory
::
Copy
(
dev_ctx
.
GetPlace
(),
tmp_out_data
->
ptr
(),
phi
::
CPUPlace
(),
reinterpret_cast
<
void
*>
(
outputs
.
data
()),
outputs
.
size
()
*
sizeof
(
T
*
),
dev_ctx
.
stream
());
auto
config
=
phi
::
backends
::
gpu
::
GetGpuLaunchConfig1D
(
dev_ctx
,
dy_pre
*
split_dim
*
dy_suf
);
if
(
out
.
numel
()
<
std
::
numeric_limits
<
int32_t
>::
max
())
{
UnStackHelperCUDAKernel
<
T
,
int32_t
><<<
config
.
block_per_grid
.
x
,
config
.
thread_per_block
.
x
,
0
,
dev_ctx
.
stream
()
>>>
(
dy_data
,
dy_pre
,
split_dim
,
dy_suf
,
split_dim
,
reinterpret_cast
<
T
**>
(
tmp_out_data
->
ptr
()));
}
else
{
UnStackHelperCUDAKernel
<
T
,
int64_t
><<<
config
.
block_per_grid
.
x
,
config
.
thread_per_block
.
x
,
0
,
dev_ctx
.
stream
()
>>>
(
dy_data
,
dy_pre
,
split_dim
,
dy_suf
,
split_dim
,
reinterpret_cast
<
T
**>
(
tmp_out_data
->
ptr
()));
}
}
}
// namespace phi
PD_REGISTER_KERNEL
(
stack_grad
,
GPU
,
ALL_LAYOUT
,
phi
::
StackGradKernel
,
float
,
double
,
int64_t
,
int
,
phi
::
dtype
::
float16
,
phi
::
dtype
::
bfloat16
)
{}
paddle/phi/kernels/gpu/stack_kernel.cu
0 → 100644
浏览文件 @
74894cd7
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/stack_kernel.h"
#include "paddle/fluid/memory/memory.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/core/kernel_registry.h"
namespace
phi
{
template
<
typename
T
,
typename
IntType
>
__global__
void
StackCUDAKernel
(
T
**
input_ptrs
,
int
split_size
,
int
rows
,
int
cols
,
T
*
__restrict__
output
)
{
IntType
grid_x
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
for
(;
grid_x
<
cols
;
grid_x
+=
blockDim
.
x
*
gridDim
.
x
)
{
IntType
grid_y
=
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
;
IntType
split
=
grid_x
/
split_size
;
const
T
*
input_ptr
=
input_ptrs
[
split
];
IntType
col_offset
=
grid_x
%
split_size
;
#pragma unroll
for
(;
grid_y
<
rows
;
grid_y
+=
blockDim
.
y
*
gridDim
.
y
)
{
output
[
grid_y
*
cols
+
grid_x
]
=
input_ptr
[
grid_y
*
split_size
+
col_offset
];
}
}
}
template
<
typename
T
,
typename
Context
>
void
StackKernel
(
const
Context
&
dev_ctx
,
const
std
::
vector
<
const
DenseTensor
*>&
x
,
int
axis
,
DenseTensor
*
out
)
{
if
(
axis
<
0
)
axis
+=
(
x
[
0
]
->
dims
().
size
()
+
1
);
int
n
=
static_cast
<
int
>
(
x
.
size
());
T
*
y_data
=
dev_ctx
.
template
Alloc
<
T
>(
out
);
std
::
vector
<
const
T
*>
x_datas
(
n
);
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
x_datas
[
i
]
=
x
[
i
]
->
data
<
T
>
();
}
auto
tmp_x_data
=
paddle
::
memory
::
Alloc
(
dev_ctx
,
x_datas
.
size
()
*
sizeof
(
T
*
));
paddle
::
memory
::
Copy
(
dev_ctx
.
GetPlace
(),
tmp_x_data
->
ptr
(),
phi
::
CPUPlace
(),
reinterpret_cast
<
void
*>
(
x_datas
.
data
()),
x_datas
.
size
()
*
sizeof
(
T
*
),
dev_ctx
.
stream
());
// Split x dim from axis to matrix
int
x_row
=
1
,
x_col
=
1
;
for
(
int
i
=
0
;
i
<
axis
;
++
i
)
{
x_row
*=
x
[
0
]
->
dims
()[
i
];
}
x_col
=
x
[
0
]
->
numel
()
/
x_row
;
int
out_col
=
x_col
*
n
;
auto
config
=
phi
::
backends
::
gpu
::
GetGpuLaunchConfig2D
(
dev_ctx
,
out_col
,
x_row
);
if
(
out
->
numel
()
<
std
::
numeric_limits
<
int32_t
>::
max
())
{
StackCUDAKernel
<
T
,
int32_t
><<<
config
.
block_per_grid
,
config
.
thread_per_block
,
0
,
dev_ctx
.
stream
()
>>>
(
reinterpret_cast
<
T
**>
(
tmp_x_data
->
ptr
()),
x_col
,
x_row
,
out_col
,
y_data
);
}
else
{
StackCUDAKernel
<
T
,
int64_t
><<<
config
.
block_per_grid
,
config
.
thread_per_block
,
0
,
dev_ctx
.
stream
()
>>>
(
reinterpret_cast
<
T
**>
(
tmp_x_data
->
ptr
()),
x_col
,
x_row
,
out_col
,
y_data
);
}
}
}
// namespace phi
PD_REGISTER_KERNEL
(
stack
,
GPU
,
ALL_LAYOUT
,
phi
::
StackKernel
,
float
,
double
,
int64_t
,
int
,
phi
::
dtype
::
float16
,
phi
::
dtype
::
bfloat16
)
{}
paddle/
fluid/operators/unique_op
.cu
→
paddle/
phi/kernels/gpu/unique_kernel
.cu
浏览文件 @
74894cd7
此差异已折叠。
点击以展开。
paddle/phi/kernels/gpu/unstack_grad_kernel.cu
0 → 100644
浏览文件 @
74894cd7
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/unstack_grad_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/unstack_grad_kernel_impl.h"
PD_REGISTER_KERNEL
(
unstack_grad
,
GPU
,
ALL_LAYOUT
,
phi
::
UnStackGradKernel
,
float
,
double
,
int64_t
,
int
,
phi
::
dtype
::
float16
)
{}
paddle/phi/kernels/gpu/unstack_kernel.cu
0 → 100644
浏览文件 @
74894cd7
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/unstack_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/unstack_kernel_impl.h"
PD_REGISTER_KERNEL
(
unstack
,
GPU
,
ALL_LAYOUT
,
phi
::
UnStackKernel
,
float
,
double
,
int64_t
,
int
,
phi
::
dtype
::
float16
)
{}
paddle/phi/kernels/impl/unstack_grad_kernel_impl.h
0 → 100644
浏览文件 @
74894cd7
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/kernels/funcs/stack_functor.h"
#if defined(__NVCC__) || defined(__HIPCC__)
#include <thrust/device_vector.h>
#endif
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
UnStackGradKernel
(
const
Context
&
dev_ctx
,
const
std
::
vector
<
const
DenseTensor
*>
&
x
,
int
axis
,
DenseTensor
*
x_grad
)
{
if
(
axis
<
0
)
axis
+=
(
x
[
0
]
->
dims
().
size
()
+
1
);
int
n
=
static_cast
<
int
>
(
x
.
size
());
auto
*
x_grad_data
=
dev_ctx
.
template
Alloc
<
T
>(
x_grad
);
std
::
vector
<
const
T
*>
x_datas
(
n
);
for
(
int
i
=
0
;
i
<
n
;
i
++
)
x_datas
[
i
]
=
x
[
i
]
->
data
<
T
>
();
int
pre
=
1
;
int
post
=
1
;
auto
&
dim
=
x
[
0
]
->
dims
();
for
(
auto
i
=
0
;
i
<
axis
;
++
i
)
pre
*=
dim
[
i
];
for
(
auto
i
=
axis
;
i
<
dim
.
size
();
++
i
)
post
*=
dim
[
i
];
#if defined(__NVCC__) || defined(__HIPCC__)
int
total_num
=
pre
*
n
*
post
;
thrust
::
device_vector
<
const
T
*>
device_x_vec
(
x_datas
);
auto
x_data_arr
=
device_x_vec
.
data
().
get
();
phi
::
funcs
::
StackFunctorForRange
(
dev_ctx
,
x_data_arr
,
x_grad_data
,
total_num
,
n
,
post
);
// Wait() must be called because device_x_vec may be destructed before
// kernel ends
dev_ctx
.
Wait
();
#else
auto
x_data_arr
=
x_datas
.
data
();
size_t
x_offset
=
0
;
size_t
y_offset
=
0
;
for
(
int
i
=
0
;
i
<
pre
;
i
++
)
{
for
(
int
j
=
0
;
j
<
n
;
j
++
)
{
std
::
memcpy
(
x_grad_data
+
y_offset
,
x_data_arr
[
j
]
+
x_offset
,
post
*
sizeof
(
T
));
y_offset
+=
post
;
}
x_offset
+=
post
;
}
#endif
}
}
// namespace phi
paddle/phi/kernels/impl/unstack_kernel_impl.h
0 → 100644
浏览文件 @
74894cd7
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/kernels/funcs/stack_functor.h"
#if defined(__NVCC__) || defined(__HIPCC__)
#include <thrust/device_vector.h>
#endif
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
UnStackKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
int
axis
,
int
num
,
std
::
vector
<
DenseTensor
*>
outs
)
{
auto
*
dy
=
&
x
;
auto
dx
=
outs
;
if
(
axis
<
0
)
axis
+=
dy
->
dims
().
size
();
int
n
=
dy
->
dims
()[
axis
];
std
::
vector
<
T
*>
dx_datas
(
n
);
// NOLINT
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
dx_datas
[
i
]
=
dev_ctx
.
template
Alloc
<
T
>(
dx
[
i
]);
}
auto
dy_data
=
dy
->
data
<
T
>
();
if
(
dy
->
numel
()
==
0
)
return
;
int
pre
=
1
;
for
(
int
i
=
0
;
i
<
axis
;
++
i
)
pre
*=
dy
->
dims
()[
i
];
int
total_num
=
dy
->
numel
();
int
post
=
total_num
/
(
n
*
pre
);
#if defined(__NVCC__) || defined(__HIPCC__)
thrust
::
device_vector
<
T
*>
device_dx_vec
(
dx_datas
);
auto
dx_data_arr
=
device_dx_vec
.
data
().
get
();
#else
auto
dx_data_arr
=
dx_datas
.
data
();
#endif
phi
::
funcs
::
StackGradFunctorForRange
(
dev_ctx
,
dx_data_arr
,
dy_data
,
total_num
,
n
,
post
);
#if defined(__NVCC__) || defined(__HIPCC__)
// Wait() must be called because device_dx_vec may be destructed before
// kernel ends
dev_ctx
.
Wait
();
#endif
}
}
// namespace phi
paddle/phi/kernels/range_kernel.h
0 → 100644
浏览文件 @
74894cd7
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
RangeKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
start
,
const
DenseTensor
&
end
,
const
DenseTensor
&
step
,
DenseTensor
*
out
);
}
// namespace phi
paddle/phi/kernels/stack_grad_kernel.h
0 → 100644
浏览文件 @
74894cd7
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
StackGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
out
,
int
axis
,
std
::
vector
<
DenseTensor
*>
x_grad
);
}
// namespace phi
paddle/phi/kernels/stack_kernel.h
0 → 100644
浏览文件 @
74894cd7
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
StackKernel
(
const
Context
&
dev_ctx
,
const
std
::
vector
<
const
DenseTensor
*>&
x
,
int
axis
,
DenseTensor
*
out
);
}
// namespace phi
paddle/phi/kernels/unique_kernel.h
0 → 100644
浏览文件 @
74894cd7
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
UniqueKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
bool
return_index
,
bool
return_inverse
,
bool
return_counts
,
const
std
::
vector
<
int
>&
axis
,
DataType
dtype
,
DenseTensor
*
out
,
DenseTensor
*
indices
,
DenseTensor
*
index
,
DenseTensor
*
counts
);
template
<
typename
T
,
typename
Context
>
void
UniqueRawKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
bool
return_index
,
bool
return_inverse
,
bool
return_counts
,
const
std
::
vector
<
int
>&
axis
,
DataType
dtype
,
bool
is_sorted
,
DenseTensor
*
out
,
DenseTensor
*
indices
,
DenseTensor
*
index
,
DenseTensor
*
counts
);
}
// namespace phi
paddle/phi/kernels/unstack_grad_kernel.h
0 → 100644
浏览文件 @
74894cd7
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
UnStackGradKernel
(
const
Context
&
dev_ctx
,
const
std
::
vector
<
const
DenseTensor
*>&
x
,
int
axis
,
DenseTensor
*
x_grad
);
}
// namespace phi
paddle/phi/kernels/unstack_kernel.h
0 → 100644
浏览文件 @
74894cd7
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
UnStackKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
int
axis
,
int
num
,
std
::
vector
<
DenseTensor
*>
outs
);
}
// namespace phi
paddle/phi/ops/compat/stack_sig.cc
0 → 100644
浏览文件 @
74894cd7
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/core/compat/op_utils.h"
namespace
phi
{
KernelSignature
StackGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"stack_grad"
,
{
GradVarName
(
"Y"
)},
{
"axis"
},
{
GradVarName
(
"X"
)});
}
}
// namespace phi
PD_REGISTER_ARG_MAPPING_FN
(
stack_grad
,
phi
::
StackGradOpArgumentMapping
);
paddle/phi/ops/compat/unique_sig.cc
0 → 100644
浏览文件 @
74894cd7
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/core/compat/op_utils.h"
namespace
phi
{
KernelSignature
UniqueOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
bool
is_sorted
=
paddle
::
any_cast
<
bool
>
(
ctx
.
Attr
(
"is_sorted"
));
if
(
is_sorted
)
{
return
KernelSignature
(
"unique"
,
{
"X"
},
{
"return_index"
,
"return_inverse"
,
"return_counts"
,
"axis"
,
"dtype"
},
{
"Out"
,
"Indices"
,
"Index"
,
"Counts"
});
}
else
{
return
KernelSignature
(
"unique_raw"
,
{
"X"
},
{
"return_index"
,
"return_inverse"
,
"return_counts"
,
"axis"
,
"dtype"
,
"is_sorted"
},
{
"Out"
,
"Indices"
,
"Index"
,
"Counts"
});
}
}
}
// namespace phi
PD_REGISTER_ARG_MAPPING_FN
(
unique
,
phi
::
UniqueOpArgumentMapping
);
paddle/phi/ops/compat/unstack_sig.cc
0 → 100644
浏览文件 @
74894cd7
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/core/compat/op_utils.h"
namespace
phi
{
KernelSignature
UnStackGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"unstack_grad"
,
{
GradVarName
(
"Y"
)},
{
"axis"
},
{
GradVarName
(
"X"
)});
}
}
// namespace phi
PD_REGISTER_ARG_MAPPING_FN
(
unstack_grad
,
phi
::
UnStackGradOpArgumentMapping
);
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录