Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
64538c8d
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
64538c8d
编写于
1月 04, 2022
作者:
C
Chen Weihang
提交者:
GitHub
1月 04, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[PTen] Move inner empty and cast api to kernel.h (#38587)
* move inner cast api to cast_kernel.h * resolve conflit
上级
59888bba
变更
11
显示空白变更内容
内联
并排
Showing
11 changed file
with
86 addition
and
76 deletion
+86
-76
paddle/fluid/operators/cast_op.h
paddle/fluid/operators/cast_op.h
+1
-1
paddle/pten/include/creation.h
paddle/pten/include/creation.h
+0
-31
paddle/pten/include/manipulation.h
paddle/pten/include/manipulation.h
+0
-13
paddle/pten/kernels/CMakeLists.txt
paddle/pten/kernels/CMakeLists.txt
+2
-0
paddle/pten/kernels/cast_kernel.h
paddle/pten/kernels/cast_kernel.h
+16
-5
paddle/pten/kernels/cpu/cast_kernel.cc
paddle/pten/kernels/cpu/cast_kernel.cc
+6
-6
paddle/pten/kernels/empty_kernel.cc
paddle/pten/kernels/empty_kernel.cc
+8
-8
paddle/pten/kernels/empty_kernel.h
paddle/pten/kernels/empty_kernel.h
+45
-4
paddle/pten/kernels/gpu/cast_kernel.cu
paddle/pten/kernels/gpu/cast_kernel.cu
+6
-6
paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h
paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h
+1
-1
paddle/pten/kernels/hybird/general/reduce_impl.h
paddle/pten/kernels/hybird/general/reduce_impl.h
+1
-1
未找到文件。
paddle/fluid/operators/cast_op.h
浏览文件 @
64538c8d
...
@@ -71,7 +71,7 @@ class CastOpKernel : public framework::OpKernel<InT> {
...
@@ -71,7 +71,7 @@ class CastOpKernel : public framework::OpKernel<InT> {
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
out_dtype
));
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
out_dtype
));
// call new kernel
// call new kernel
pten
::
Cast
<
InT
>
(
dev_ctx
,
*
pt_x
.
get
(),
pt_out_dtype
,
pt_out
.
get
());
pten
::
Cast
Kernel
<
InT
>
(
dev_ctx
,
*
pt_x
.
get
(),
pt_out_dtype
,
pt_out
.
get
());
}
}
};
};
...
...
paddle/pten/include/creation.h
浏览文件 @
64538c8d
...
@@ -23,37 +23,6 @@ namespace pten {
...
@@ -23,37 +23,6 @@ namespace pten {
// TODO(YuanRisheng) This function name should be same as User API name.
// TODO(YuanRisheng) This function name should be same as User API name.
// TODO(zyfncg) Automatic code generation
// TODO(zyfncg) Automatic code generation
template
<
typename
T
,
typename
ContextT
>
DenseTensor
Empty
(
const
ContextT
&
dev_ctx
,
const
ScalarArray
&
shape
,
DataType
dtype
=
DataType
::
FLOAT32
,
Backend
backend
=
Backend
::
CPU
,
// Is backend needed here?
DataLayout
layout
=
DataLayout
::
NCHW
)
{
auto
out_meta
=
CreateInferMeta
(
shape
,
dtype
,
layout
);
pten
::
DenseTensor
dense_out
(
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
dev_ctx
.
GetPlace
()),
std
::
move
(
out_meta
));
Empty
<
T
,
ContextT
>
(
dev_ctx
,
shape
,
&
dense_out
);
return
dense_out
;
}
template
<
typename
T
,
typename
ContextT
>
DenseTensor
EmptyLike
(
const
ContextT
&
dev_ctx
,
const
DenseTensor
&
x
,
DataType
dtype
=
DataType
::
UNDEFINED
,
Backend
backend
=
Backend
::
UNDEFINED
,
// Is backend needed here?
DataLayout
layout
=
DataLayout
::
UNDEFINED
)
{
auto
out_meta
=
CreateLikeInferMeta
(
x
.
meta
(),
dtype
,
layout
);
pten
::
DenseTensor
dense_out
(
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
dev_ctx
.
GetPlace
()),
std
::
move
(
out_meta
));
EmptyLike
<
T
,
ContextT
>
(
dev_ctx
,
&
dense_out
);
return
dense_out
;
}
template
<
typename
T
,
typename
ContextT
>
template
<
typename
T
,
typename
ContextT
>
DenseTensor
Full
(
const
ContextT
&
dev_ctx
,
DenseTensor
Full
(
const
ContextT
&
dev_ctx
,
const
ScalarArray
&
shape
,
const
ScalarArray
&
shape
,
...
...
paddle/pten/include/manipulation.h
浏览文件 @
64538c8d
...
@@ -37,19 +37,6 @@ DenseTensor Flatten(const ContextT& dev_ctx,
...
@@ -37,19 +37,6 @@ DenseTensor Flatten(const ContextT& dev_ctx,
return
dense_out
;
return
dense_out
;
}
}
template
<
typename
T
,
typename
ContextT
>
DenseTensor
Cast
(
const
ContextT
&
dev_ctx
,
const
DenseTensor
&
x
,
DataType
out_dtype
)
{
auto
out_meta
=
CastInferMeta
(
x
.
meta
(),
out_dtype
);
pten
::
DenseTensor
dense_out
(
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
dev_ctx
.
GetPlace
()),
std
::
move
(
out_meta
));
Cast
<
T
,
ContextT
>
(
dev_ctx
,
x
,
out_dtype
,
&
dense_out
);
return
dense_out
;
}
template
<
typename
T
,
typename
ContextT
>
template
<
typename
T
,
typename
ContextT
>
DenseTensor
Reshape
(
const
ContextT
&
dev_ctx
,
DenseTensor
Reshape
(
const
ContextT
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
x
,
...
...
paddle/pten/kernels/CMakeLists.txt
浏览文件 @
64538c8d
...
@@ -26,6 +26,8 @@ set_property(GLOBAL PROPERTY PTEN_KERNELS "")
...
@@ -26,6 +26,8 @@ set_property(GLOBAL PROPERTY PTEN_KERNELS "")
set
(
COMMON_KERNEL_DEPS dense_tensor kernel_context kernel_factory convert_utils
)
set
(
COMMON_KERNEL_DEPS dense_tensor kernel_context kernel_factory convert_utils
)
set
(
COMMON_KERNEL_DEPS
${
COMMON_KERNEL_DEPS
}
eigen_function blas
)
set
(
COMMON_KERNEL_DEPS
${
COMMON_KERNEL_DEPS
}
eigen_function blas
)
# remove this dep after removing fluid deps on tensor creation
set
(
COMMON_KERNEL_DEPS
${
COMMON_KERNEL_DEPS
}
pten_api_utils
)
set
(
COMMON_KERNEL_DEPS
${
COMMON_KERNEL_DEPS
}
infermeta
)
set
(
COMMON_KERNEL_DEPS
${
COMMON_KERNEL_DEPS
}
infermeta
)
set
(
MATH_KERNEL_DEPS
${
COMMON_KERNEL_DEPS
}
cast_kernel copy_kernel pten_transpose_cpu
)
set
(
MATH_KERNEL_DEPS
${
COMMON_KERNEL_DEPS
}
cast_kernel copy_kernel pten_transpose_cpu
)
...
...
paddle/pten/kernels/cast_kernel.h
浏览文件 @
64538c8d
...
@@ -15,13 +15,24 @@ limitations under the License. */
...
@@ -15,13 +15,24 @@ limitations under the License. */
#pragma once
#pragma once
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/kernels/empty_kernel.h"
namespace
pten
{
namespace
pten
{
template
<
typename
T
,
typename
Context
T
>
template
<
typename
T
,
typename
Context
>
void
Cast
(
const
ContextT
&
dev_ctx
,
void
Cast
Kernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
x
,
DataType
out_dtype
,
DataType
out_dtype
,
DenseTensor
*
out
);
DenseTensor
*
out
);
template
<
typename
T
,
typename
Context
>
DenseTensor
Cast
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
DataType
out_dtype
)
{
auto
out_meta
=
CastInferMeta
(
x
.
meta
(),
out_dtype
);
auto
dense_out
=
pten
::
Empty
<
T
,
Context
>
(
dev_ctx
,
std
::
move
(
out_meta
));
CastKernel
<
T
,
Context
>
(
dev_ctx
,
x
,
out_dtype
,
&
dense_out
);
return
dense_out
;
}
}
// namespace pten
}
// namespace pten
paddle/pten/kernels/cpu/cast_kernel.cc
浏览文件 @
64538c8d
...
@@ -46,8 +46,8 @@ void CastKernelImpl(const CPUContext& dev_ctx,
...
@@ -46,8 +46,8 @@ void CastKernelImpl(const CPUContext& dev_ctx,
CastOpTransformFunctor
<
InT
,
OutT
>
());
CastOpTransformFunctor
<
InT
,
OutT
>
());
}
}
template
<
typename
T
,
typename
Context
T
>
template
<
typename
T
,
typename
Context
>
void
Cast
(
const
ContextT
&
dev_ctx
,
void
Cast
Kernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
x
,
DataType
out_dtype
,
DataType
out_dtype
,
DenseTensor
*
out
)
{
DenseTensor
*
out
)
{
...
@@ -61,7 +61,7 @@ void Cast(const ContextT& dev_ctx,
...
@@ -61,7 +61,7 @@ void Cast(const ContextT& dev_ctx,
PT_REGISTER_CTX_KERNEL
(
cast
,
PT_REGISTER_CTX_KERNEL
(
cast
,
CPU
,
CPU
,
ALL_LAYOUT
,
ALL_LAYOUT
,
pten
::
Cast
,
pten
::
Cast
Kernel
,
float
,
float
,
double
,
double
,
int
,
int
,
...
...
paddle/pten/kernels/empty_kernel.cc
浏览文件 @
64538c8d
...
@@ -20,14 +20,14 @@ limitations under the License. */
...
@@ -20,14 +20,14 @@ limitations under the License. */
namespace
pten
{
namespace
pten
{
template
<
typename
T
,
typename
ContextT
>
template
<
typename
T
,
typename
ContextT
>
void
Empty
(
const
ContextT
&
dev_ctx
,
void
Empty
Kernel
(
const
ContextT
&
dev_ctx
,
const
ScalarArray
&
shape
,
const
ScalarArray
&
shape
,
DenseTensor
*
out
)
{
DenseTensor
*
out
)
{
out
->
Resize
(
paddle
::
framework
::
make_ddim
(
shape
.
GetData
()));
out
->
Resize
(
paddle
::
framework
::
make_ddim
(
shape
.
GetData
()));
}
}
template
<
typename
T
,
typename
ContextT
>
template
<
typename
T
,
typename
ContextT
>
void
EmptyLike
(
const
ContextT
&
dev_ctx
,
DenseTensor
*
out
)
{
void
EmptyLike
Kernel
(
const
ContextT
&
dev_ctx
,
DenseTensor
*
out
)
{
out
->
mutable_data
<
T
>
();
out
->
mutable_data
<
T
>
();
}
}
...
@@ -36,7 +36,7 @@ void EmptyLike(const ContextT& dev_ctx, DenseTensor* out) {
...
@@ -36,7 +36,7 @@ void EmptyLike(const ContextT& dev_ctx, DenseTensor* out) {
PT_REGISTER_CTX_KERNEL
(
empty
,
PT_REGISTER_CTX_KERNEL
(
empty
,
CPU
,
CPU
,
ALL_LAYOUT
,
ALL_LAYOUT
,
pten
::
Empty
,
pten
::
Empty
Kernel
,
bool
,
bool
,
int
,
int
,
int64_t
,
int64_t
,
...
@@ -47,7 +47,7 @@ PT_REGISTER_CTX_KERNEL(empty,
...
@@ -47,7 +47,7 @@ PT_REGISTER_CTX_KERNEL(empty,
PT_REGISTER_CTX_KERNEL
(
empty_like
,
PT_REGISTER_CTX_KERNEL
(
empty_like
,
CPU
,
CPU
,
ALL_LAYOUT
,
ALL_LAYOUT
,
pten
::
EmptyLike
,
pten
::
EmptyLike
Kernel
,
bool
,
bool
,
int
,
int
,
int64_t
,
int64_t
,
...
@@ -59,7 +59,7 @@ PT_REGISTER_CTX_KERNEL(empty_like,
...
@@ -59,7 +59,7 @@ PT_REGISTER_CTX_KERNEL(empty_like,
PT_REGISTER_CTX_KERNEL
(
empty
,
PT_REGISTER_CTX_KERNEL
(
empty
,
GPU
,
GPU
,
ALL_LAYOUT
,
ALL_LAYOUT
,
pten
::
Empty
,
pten
::
Empty
Kernel
,
bool
,
bool
,
int
,
int
,
int64_t
,
int64_t
,
...
@@ -70,7 +70,7 @@ PT_REGISTER_CTX_KERNEL(empty,
...
@@ -70,7 +70,7 @@ PT_REGISTER_CTX_KERNEL(empty,
PT_REGISTER_CTX_KERNEL
(
empty_like
,
PT_REGISTER_CTX_KERNEL
(
empty_like
,
GPU
,
GPU
,
ALL_LAYOUT
,
ALL_LAYOUT
,
pten
::
EmptyLike
,
pten
::
EmptyLike
Kernel
,
bool
,
bool
,
int
,
int
,
int64_t
,
int64_t
,
...
...
paddle/pten/kernels/empty_kernel.h
浏览文件 @
64538c8d
...
@@ -14,15 +14,56 @@
...
@@ -14,15 +14,56 @@
#pragma once
#pragma once
#include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/common/scalar_array.h"
#include "paddle/pten/common/scalar_array.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/infermeta/nary.h"
#include "paddle/pten/infermeta/unary.h"
namespace
pten
{
namespace
pten
{
template
<
typename
T
,
typename
ContextT
>
template
<
typename
T
,
typename
Context
>
void
Empty
(
const
ContextT
&
dev_ctx
,
const
ScalarArray
&
shape
,
DenseTensor
*
out
);
void
EmptyKernel
(
const
Context
&
dev_ctx
,
const
ScalarArray
&
shape
,
DenseTensor
*
out
);
template
<
typename
T
,
typename
ContextT
>
template
<
typename
T
,
typename
Context
>
void
EmptyLike
(
const
ContextT
&
dev_ctx
,
DenseTensor
*
out
);
void
EmptyLikeKernel
(
const
Context
&
dev_ctx
,
DenseTensor
*
out
);
// TODO(chenweihang): the tensor creation method need to be replaced later,
// all kernel api call Empty here instead of making tensor self
template
<
typename
T
,
typename
Context
>
DenseTensor
Empty
(
const
Context
&
dev_ctx
,
DenseTensorMeta
&&
meta
)
{
pten
::
DenseTensor
dense_out
(
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
dev_ctx
.
GetPlace
()),
std
::
move
(
meta
));
return
dense_out
;
}
template
<
typename
T
,
typename
Context
>
DenseTensor
Empty
(
const
Context
&
dev_ctx
,
const
ScalarArray
&
shape
,
DataType
dtype
=
DataType
::
FLOAT32
,
Backend
backend
=
Backend
::
CPU
,
// Is backend needed here?
DataLayout
layout
=
DataLayout
::
NCHW
)
{
auto
out_meta
=
CreateInferMeta
(
shape
,
dtype
,
layout
);
auto
dense_out
=
Empty
<
T
,
Context
>
(
dev_ctx
,
std
::
move
(
out_meta
));
EmptyKernel
<
T
,
Context
>
(
dev_ctx
,
shape
,
&
dense_out
);
return
dense_out
;
}
template
<
typename
T
,
typename
Context
>
DenseTensor
EmptyLike
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
DataType
dtype
=
DataType
::
UNDEFINED
,
Backend
backend
=
Backend
::
UNDEFINED
,
// Is backend needed here?
DataLayout
layout
=
DataLayout
::
UNDEFINED
)
{
auto
out_meta
=
CreateLikeInferMeta
(
x
.
meta
(),
dtype
,
layout
);
auto
dense_out
=
Empty
<
T
,
Context
>
(
dev_ctx
,
std
::
move
(
out_meta
));
EmptyLikeKernel
<
T
,
Context
>
(
dev_ctx
,
&
dense_out
);
return
dense_out
;
}
}
// namespace pten
}
// namespace pten
paddle/pten/kernels/gpu/cast_kernel.cu
浏览文件 @
64538c8d
...
@@ -85,8 +85,8 @@ void CastCUDAKernelImpl(const GPUContext& dev_ctx,
...
@@ -85,8 +85,8 @@ void CastCUDAKernelImpl(const GPUContext& dev_ctx,
CastCUDAKernelImplWithPtr
(
dev_ctx
,
in_data
,
out_data
,
size
);
CastCUDAKernelImplWithPtr
(
dev_ctx
,
in_data
,
out_data
,
size
);
}
}
template
<
typename
T
,
typename
Context
T
>
template
<
typename
T
,
typename
Context
>
void
Cast
(
const
ContextT
&
dev_ctx
,
void
Cast
Kernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
x
,
DataType
out_dtype
,
DataType
out_dtype
,
DenseTensor
*
out
)
{
DenseTensor
*
out
)
{
...
@@ -101,7 +101,7 @@ void Cast(const ContextT& dev_ctx,
...
@@ -101,7 +101,7 @@ void Cast(const ContextT& dev_ctx,
PT_REGISTER_CTX_KERNEL(cast, \
PT_REGISTER_CTX_KERNEL(cast, \
GPU, \
GPU, \
ALL_LAYOUT, \
ALL_LAYOUT, \
pten::Cast
,
\
pten::Cast
Kernel,
\
float, \
float, \
double, \
double, \
int, \
int, \
...
...
paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h
浏览文件 @
64538c8d
...
@@ -1112,7 +1112,7 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x,
...
@@ -1112,7 +1112,7 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x,
AsyncCopy
(
x
,
y
);
AsyncCopy
(
x
,
y
);
y
->
Resize
(
out_dims
);
y
->
Resize
(
out_dims
);
}
else
{
}
else
{
pten
::
Cast
<
Tx
>
(
*
dev_ctx
,
x
,
y
->
dtype
(),
y
);
pten
::
Cast
Kernel
<
Tx
>
(
*
dev_ctx
,
x
,
y
->
dtype
(),
y
);
}
}
return
;
return
;
}
}
...
...
paddle/pten/kernels/hybird/general/reduce_impl.h
浏览文件 @
64538c8d
...
@@ -60,7 +60,7 @@ void Reduce(const DeviceContext& dev_ctx,
...
@@ -60,7 +60,7 @@ void Reduce(const DeviceContext& dev_ctx,
pten
::
DenseTensorMeta
(
out_dtype
,
x
.
dims
(),
x
.
layout
()));
pten
::
DenseTensorMeta
(
out_dtype
,
x
.
dims
(),
x
.
layout
()));
// cast x tensor to out_dtype
// cast x tensor to out_dtype
pten
::
Cast
<
T
,
DeviceContext
>
(
dev_ctx
,
x
,
out_dtype
,
&
tmp_tensor
);
pten
::
Cast
Kernel
<
T
,
DeviceContext
>
(
dev_ctx
,
x
,
out_dtype
,
&
tmp_tensor
);
// do reduce sum
// do reduce sum
PD_VISIT_ALL_TYPES
(
PD_VISIT_ALL_TYPES
(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录