Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
e78eb3f4
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
e78eb3f4
编写于
12月 14, 2021
作者:
C
Chen Weihang
提交者:
GitHub
12月 15, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
replace moves_storage and alloc_construct (#38134)
上级
49108efa
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
85 addition
and
90 deletion
+85
-90
paddle/fluid/framework/custom_operator.cc
paddle/fluid/framework/custom_operator.cc
+2
-2
paddle/pten/api/lib/utils.cc
paddle/pten/api/lib/utils.cc
+5
-5
paddle/pten/include/creation.h
paddle/pten/include/creation.h
+5
-5
paddle/pten/include/linalg.h
paddle/pten/include/linalg.h
+5
-5
paddle/pten/include/manipulation.h
paddle/pten/include/manipulation.h
+13
-13
paddle/pten/include/math.h
paddle/pten/include/math.h
+33
-33
paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h
paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h
+3
-6
paddle/pten/kernels/hybird/eigen/reduce.h
paddle/pten/kernels/hybird/eigen/reduce.h
+4
-4
paddle/pten/kernels/hybird/general/reduce_impl.h
paddle/pten/kernels/hybird/general/reduce_impl.h
+2
-3
paddle/pten/tests/api/scale_api.h
paddle/pten/tests/api/scale_api.h
+8
-9
python/paddle/utils/code_gen/api_gen.py
python/paddle/utils/code_gen/api_gen.py
+5
-5
未找到文件。
paddle/fluid/framework/custom_operator.cc
浏览文件 @
e78eb3f4
...
...
@@ -207,14 +207,14 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx,
"Tensors."
,
vec_true_outs
.
size
(),
outs
.
size
()));
for
(
size_t
j
=
0
;
j
<
vec_true_outs
.
size
();
++
j
)
{
experimental
::
MovesStorage
(
experimental
::
MovesS
haredS
torage
(
std
::
dynamic_pointer_cast
<
pten
::
DenseTensor
>
(
outs
.
at
(
j
).
impl
())
.
get
(),
vec_true_outs
.
at
(
j
));
}
}
else
{
auto
*
true_out
=
ctx
.
Output
<
Tensor
>
(
out_name
);
experimental
::
MovesStorage
(
experimental
::
MovesS
haredS
torage
(
std
::
dynamic_pointer_cast
<
pten
::
DenseTensor
>
(
outs
.
at
(
i
).
impl
())
.
get
(),
true_out
);
...
...
paddle/pten/api/lib/utils.cc
浏览文件 @
e78eb3f4
...
...
@@ -20,7 +20,7 @@ limitations under the License. */
#include "paddle/pten/api/lib/api_registry.h"
#include "paddle/pten/api/lib/kernel_dispatch.h"
#include "paddle/pten/api/lib/utils/
allocator
.h"
#include "paddle/pten/api/lib/utils/
storage
.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/include/core.h"
#include "paddle/pten/include/infermeta.h"
...
...
@@ -62,10 +62,10 @@ PADDLE_API Tensor copy_to(const Tensor& x, Backend backend, bool blocking) {
auto
out_meta
=
UnchangedInferMeta
(
dense_x
->
meta
());
// 5. Prepare outputs
const
auto
allocator
=
std
::
make_shared
<
paddle
::
experimental
::
DefaultAllocator
>
(
pten
::
TransToFluidPlace
(
backend
))
;
auto
dense_out
=
std
::
make_shared
<
pten
::
DenseTensor
>
(
allocator
,
out_meta
);
auto
dense_out
=
std
::
make_shared
<
pten
::
DenseTensor
>
(
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
pten
::
TransToFluidPlace
(
backend
))
,
std
::
move
(
out_meta
)
);
kernel_context
.
EmplaceBackOutput
(
dense_out
);
Tensor
out
;
out
.
set_impl
(
dense_out
);
...
...
paddle/pten/include/creation.h
浏览文件 @
e78eb3f4
...
...
@@ -14,7 +14,7 @@
#pragma once
#include "paddle/pten/api/lib/utils/
allocator
.h"
#include "paddle/pten/api/lib/utils/
storage
.h"
#include "paddle/pten/include/infermeta.h"
#include "paddle/pten/kernels/cpu/creation.h"
#include "paddle/pten/kernels/cuda/creation.h"
...
...
@@ -32,10 +32,10 @@ DenseTensor FillAnyLike(
Backend
backend
=
Backend
::
UNDEFINED
,
// Is backend needed here?
DataLayout
layout
=
DataLayout
::
UNDEFINED
)
{
auto
out_meta
=
FullLikeInferMeta
(
x
.
meta
(),
dtype
,
layout
);
const
auto
allocator
=
std
::
make_shared
<
paddle
::
experimental
::
DefaultAllocator
>
(
dev_ctx
.
GetPlace
())
;
pten
::
DenseTensor
dense_out
(
allocator
,
out_meta
);
pten
::
DenseTensor
dense_out
(
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
dev_ctx
.
GetPlace
())
,
std
::
move
(
out_meta
)
);
FillAnyLike
<
T
>
(
dev_ctx
,
val
,
&
dense_out
);
return
dense_out
;
}
...
...
paddle/pten/include/linalg.h
浏览文件 @
e78eb3f4
...
...
@@ -15,7 +15,7 @@
#pragma once
// See Note: [ How do we organize the kernel directory ]
#include "paddle/pten/api/lib/utils/
allocator
.h"
#include "paddle/pten/api/lib/utils/
storage
.h"
#include "paddle/pten/include/infermeta.h"
#include "paddle/pten/kernels/cpu/linalg.h"
#include "paddle/pten/kernels/cuda/linalg.h"
...
...
@@ -27,10 +27,10 @@ DenseTensor Dot(const ContextT& dev_ctx,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
)
{
auto
out_meta
=
DotInferMeta
(
x
.
meta
(),
y
.
meta
());
const
auto
allocator
=
std
::
make_shared
<
paddle
::
experimental
::
DefaultAllocator
>
(
dev_ctx
.
GetPlace
())
;
pten
::
DenseTensor
dense_out
(
allocator
,
out_meta
);
pten
::
DenseTensor
dense_out
(
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
dev_ctx
.
GetPlace
())
,
std
::
move
(
out_meta
)
);
Dot
<
T
>
(
dev_ctx
,
x
,
y
,
&
dense_out
);
return
dense_out
;
}
...
...
paddle/pten/include/manipulation.h
浏览文件 @
e78eb3f4
...
...
@@ -15,7 +15,7 @@
#pragma once
// See Note: [ How do we organize the kernel directory ]
#include "paddle/pten/api/lib/utils/
allocator
.h"
#include "paddle/pten/api/lib/utils/
storage
.h"
#include "paddle/pten/include/infermeta.h"
#include "paddle/pten/kernels/cpu/manipulation.h"
#include "paddle/pten/kernels/cuda/manipulation.h"
...
...
@@ -29,10 +29,10 @@ DenseTensor Flatten(const ContextT& dev_ctx,
int
start_axis
,
int
stop_axis
)
{
auto
out_meta
=
FlattenInferMeta
(
x
.
meta
(),
start_axis
,
stop_axis
);
const
auto
allocator
=
std
::
make_shared
<
paddle
::
experimental
::
DefaultAllocator
>
(
dev_ctx
.
GetPlace
())
;
pten
::
DenseTensor
dense_out
(
allocator
,
out_meta
);
pten
::
DenseTensor
dense_out
(
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
dev_ctx
.
GetPlace
())
,
std
::
move
(
out_meta
)
);
Flatten
<
T
>
(
dev_ctx
,
x
,
start_axis
,
stop_axis
,
&
dense_out
);
return
dense_out
;
}
...
...
@@ -43,10 +43,10 @@ DenseTensor Cast(const ContextT& dev_ctx,
DataType
out_dtype
,
DataType
in_dtype
)
{
auto
out_meta
=
CastInferMeta
(
x
.
meta
(),
out_dtype
);
const
auto
allocator
=
std
::
make_shared
<
paddle
::
experimental
::
DefaultAllocator
>
(
dev_ctx
.
GetPlace
())
;
pten
::
DenseTensor
dense_out
(
allocator
,
out_meta
);
pten
::
DenseTensor
dense_out
(
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
dev_ctx
.
GetPlace
())
,
std
::
move
(
out_meta
)
);
Cast
<
T
>
(
dev_ctx
,
x
,
out_dtype
,
in_dtype
,
&
dense_out
);
return
dense_out
;
}
...
...
@@ -56,10 +56,10 @@ DenseTensor Reshape(const ContextT& dev_ctx,
const
DenseTensor
&
x
,
const
std
::
vector
<
int64_t
>&
shape
)
{
auto
out_meta
=
InferMetaFromVecValue
(
x
.
meta
(),
shape
);
const
auto
allocator
=
std
::
make_shared
<
paddle
::
experimental
::
DefaultAllocator
>
(
dev_ctx
.
GetPlace
())
;
pten
::
DenseTensor
dense_out
(
allocator
,
out_meta
);
pten
::
DenseTensor
dense_out
(
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
dev_ctx
.
GetPlace
())
,
std
::
move
(
out_meta
)
);
Reshape
(
dev_ctx
,
x
,
ScalarArray
(
shape
),
&
dense_out
);
return
dense_out
;
}
...
...
paddle/pten/include/math.h
浏览文件 @
e78eb3f4
...
...
@@ -15,7 +15,7 @@ limitations under the License. */
#pragma once
// See Note: [ How do we organize the kernel directory ]
#include "paddle/pten/api/lib/utils/
allocator
.h"
#include "paddle/pten/api/lib/utils/
storage
.h"
#include "paddle/pten/include/infermeta.h"
#include "paddle/pten/kernels/cpu/math.h"
#include "paddle/pten/kernels/cuda/math.h"
...
...
@@ -25,10 +25,10 @@ namespace pten {
template
<
typename
T
,
typename
ContextT
>
DenseTensor
Sign
(
const
ContextT
&
dev_ctx
,
const
DenseTensor
&
x
)
{
auto
out_meta
=
UnchangedInferMeta
(
x
.
meta
());
const
auto
allocator
=
std
::
make_shared
<
paddle
::
experimental
::
DefaultAllocator
>
(
dev_ctx
.
GetPlace
())
;
pten
::
DenseTensor
dense_out
(
allocator
,
out_meta
);
pten
::
DenseTensor
dense_out
(
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
dev_ctx
.
GetPlace
())
,
std
::
move
(
out_meta
)
);
Sign
<
T
>
(
dev_ctx
,
x
,
&
dense_out
);
return
dense_out
;
}
...
...
@@ -39,10 +39,10 @@ DenseTensor Mean(const ContextT& dev_ctx,
const
std
::
vector
<
int64_t
>&
axis
,
bool
keep_dim
)
{
auto
out_meta
=
ReduceInferMeta
(
x
.
meta
(),
axis
,
keep_dim
);
const
auto
allocator
=
std
::
make_shared
<
paddle
::
experimental
::
DefaultAllocator
>
(
dev_ctx
.
GetPlace
())
;
pten
::
DenseTensor
dense_out
(
allocator
,
out_meta
);
pten
::
DenseTensor
dense_out
(
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
dev_ctx
.
GetPlace
())
,
std
::
move
(
out_meta
)
);
bool
reduce_all
=
false
;
DataType
out_dtype
=
pten
::
DataType
::
UNDEFINED
;
Mean
<
T
>
(
...
...
@@ -57,10 +57,10 @@ DenseTensor Sum(const ContextT& dev_ctx,
DataType
dtype
,
bool
keep_dim
)
{
auto
out_meta
=
ReduceInferMeta
(
x
.
meta
(),
axis
,
keep_dim
);
const
auto
allocator
=
std
::
make_shared
<
paddle
::
experimental
::
DefaultAllocator
>
(
dev_ctx
.
GetPlace
())
;
pten
::
DenseTensor
dense_out
(
allocator
,
out_meta
);
pten
::
DenseTensor
dense_out
(
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
dev_ctx
.
GetPlace
())
,
out_meta
);
// The real value of reduce_all will be get in kernel
// so use default value(false) is OK.
...
...
@@ -82,10 +82,10 @@ DenseTensor Scale(const ContextT& dev_ctx,
float
bias
,
bool
bias_after_scale
)
{
auto
out_meta
=
UnchangedInferMeta
(
x
.
meta
());
const
auto
allocator
=
std
::
make_shared
<
paddle
::
experimental
::
DefaultAllocator
>
(
dev_ctx
.
GetPlace
())
;
pten
::
DenseTensor
dense_out
(
allocator
,
out_meta
);
pten
::
DenseTensor
dense_out
(
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
dev_ctx
.
GetPlace
())
,
std
::
move
(
out_meta
)
);
Scale
<
T
>
(
dev_ctx
,
x
,
scale
,
bias
,
bias_after_scale
,
&
dense_out
);
return
dense_out
;
}
...
...
@@ -96,10 +96,10 @@ DenseTensor Add(const ContextT& dev_ctx,
const
DenseTensor
&
y
,
int
axis
)
{
auto
out_meta
=
ElementwiseInferMeta
(
x
.
meta
(),
y
.
meta
(),
axis
);
const
auto
allocator
=
std
::
make_shared
<
paddle
::
experimental
::
DefaultAllocator
>
(
dev_ctx
.
GetPlace
())
;
pten
::
DenseTensor
dense_out
(
allocator
,
out_meta
);
pten
::
DenseTensor
dense_out
(
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
dev_ctx
.
GetPlace
())
,
std
::
move
(
out_meta
)
);
ElementwiseAdd
<
T
>
(
dev_ctx
,
x
,
y
,
axis
,
&
dense_out
);
return
dense_out
;
}
...
...
@@ -110,10 +110,10 @@ DenseTensor Subtract(const ContextT& dev_ctx,
const
DenseTensor
&
y
,
int
axis
)
{
auto
out_meta
=
ElementwiseInferMeta
(
x
.
meta
(),
y
.
meta
(),
axis
);
const
auto
allocator
=
std
::
make_shared
<
paddle
::
experimental
::
DefaultAllocator
>
(
dev_ctx
.
GetPlace
())
;
pten
::
DenseTensor
dense_out
(
allocator
,
out_meta
);
pten
::
DenseTensor
dense_out
(
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
dev_ctx
.
GetPlace
())
,
std
::
move
(
out_meta
)
);
ElementwiseSub
<
T
>
(
dev_ctx
,
x
,
y
,
axis
,
&
dense_out
);
return
dense_out
;
}
...
...
@@ -124,10 +124,10 @@ DenseTensor Divide(const ContextT& dev_ctx,
const
DenseTensor
&
y
,
int
axis
)
{
auto
out_meta
=
ElementwiseInferMeta
(
x
.
meta
(),
y
.
meta
(),
axis
);
const
auto
allocator
=
std
::
make_shared
<
paddle
::
experimental
::
DefaultAllocator
>
(
dev_ctx
.
GetPlace
())
;
pten
::
DenseTensor
dense_out
(
allocator
,
out_meta
);
pten
::
DenseTensor
dense_out
(
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
dev_ctx
.
GetPlace
())
,
std
::
move
(
out_meta
)
);
ElementwiseDiv
<
T
>
(
dev_ctx
,
x
,
y
,
axis
,
&
dense_out
);
return
dense_out
;
}
...
...
@@ -138,10 +138,10 @@ DenseTensor Multiply(const ContextT& dev_ctx,
const
DenseTensor
&
y
,
int
axis
)
{
auto
out_meta
=
ElementwiseInferMeta
(
x
.
meta
(),
y
.
meta
(),
axis
);
const
auto
allocator
=
std
::
make_shared
<
paddle
::
experimental
::
DefaultAllocator
>
(
dev_ctx
.
GetPlace
())
;
pten
::
DenseTensor
dense_out
(
allocator
,
out_meta
);
pten
::
DenseTensor
dense_out
(
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
dev_ctx
.
GetPlace
())
,
std
::
move
(
out_meta
)
);
ElementwiseMul
<
T
>
(
dev_ctx
,
x
,
y
,
axis
,
&
dense_out
);
return
dense_out
;
}
...
...
paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h
浏览文件 @
e78eb3f4
...
...
@@ -804,10 +804,9 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x,
// temp_output should be stored temp_data in output_data space or stored in
// y_data;
pten
::
DDim
tmp_ddim
;
const
auto
alloc
=
std
::
make_shared
<
paddle
::
experimental
::
DefaultAllocator
>
(
y
->
place
());
pten
::
DenseTensor
tmp
=
pten
::
DenseTensor
(
alloc
,
pten
::
DenseTensorMeta
(
y
->
dtype
(),
tmp_ddim
,
y
->
layout
()));
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
y
->
place
()),
pten
::
DenseTensorMeta
(
y
->
dtype
(),
tmp_ddim
,
y
->
layout
()));
auto
x_data
=
x
.
data
<
Tx
>
();
auto
y_data
=
y
->
mutable_data
<
Ty
>
();
...
...
@@ -847,10 +846,8 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x,
reducer
.
initial
(),
stream
);
// framework::Tensor tmp;
const
auto
alloc
=
std
::
make_shared
<
paddle
::
experimental
::
DefaultAllocator
>
(
x
.
place
());
pten
::
DenseTensor
tmp
=
pten
::
DenseTensor
(
alloc
,
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
x
.
place
())
,
pten
::
DenseTensorMeta
(
pten
::
DataType
::
UINT8
,
paddle
::
framework
::
make_ddim
(
{
static_cast
<
int64_t
>
(
temp_storage_bytes
)}),
...
...
paddle/pten/kernels/hybird/eigen/reduce.h
浏览文件 @
e78eb3f4
...
...
@@ -14,7 +14,7 @@
#pragma once
#include "paddle/pten/api/lib/utils/
allocator
.h"
#include "paddle/pten/api/lib/utils/
storage
.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/kernels/hybird/eigen/common.h"
#include "paddle/pten/kernels/hybird/transpose.h"
...
...
@@ -129,9 +129,9 @@ void HandleLargeDim(const DeviceContext& dev_ctx,
const
std
::
vector
<
int64_t
>&
dims
,
bool
keep_dim
)
{
// shuffle the reduced dim to the end
const
auto
alloc
=
std
::
make_shared
<
paddle
::
experimental
::
DefaultAllocator
>
(
input
.
place
());
pten
::
DenseTensor
shuffled_input
=
pten
::
DenseTensor
(
alloc
,
input
.
meta
());
pten
::
DenseTensor
shuffled_input
=
pten
::
DenseTensor
(
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
input
.
place
()),
input
.
meta
());
GetShuffledInput
<
DeviceContext
,
OutT
>
(
dev_ctx
,
input
,
&
shuffled_input
,
dims
);
...
...
paddle/pten/kernels/hybird/general/reduce_impl.h
浏览文件 @
e78eb3f4
...
...
@@ -53,10 +53,9 @@ void Reduce(const DeviceContext& dev_ctx,
dev_ctx
,
x
,
out
,
dims
,
keep_dim
,
reduce_all
);
}));
}
else
{
const
auto
alloc
=
std
::
make_shared
<
paddle
::
experimental
::
DefaultAllocator
>
(
x
.
place
());
pten
::
DenseTensor
tmp_tensor
=
pten
::
DenseTensor
(
alloc
,
pten
::
DenseTensorMeta
(
out_dtype
,
x
.
dims
(),
x
.
layout
()));
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
x
.
place
()),
pten
::
DenseTensorMeta
(
out_dtype
,
x
.
dims
(),
x
.
layout
()));
// cast x tensor to out_dtype first
PD_VISIT_ALL_TYPES
(
out_dtype
,
"CastKernelImpl"
,
([
&
]
{
...
...
paddle/pten/tests/api/scale_api.h
浏览文件 @
e78eb3f4
...
...
@@ -71,11 +71,10 @@ PADDLE_API Tensor scale_kernel_context(const Tensor& x,
kernel_context
.
EmplaceBackAttr
(
bias_after_scale
);
auto
out_meta
=
pten
::
UnchangedInferMeta
(
dense_x
->
meta
());
const
auto
allocator
=
std
::
make_shared
<
paddle
::
experimental
::
DefaultAllocator
>
(
pten
::
TransToFluidPlace
(
kernel_backend
));
auto
dense_out
=
std
::
make_shared
<
pten
::
DenseTensor
>
(
allocator
,
out_meta
);
auto
dense_out
=
std
::
make_shared
<
pten
::
DenseTensor
>
(
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
pten
::
TransToFluidPlace
(
kernel_backend
)),
std
::
move
(
out_meta
));
kernel_context
.
EmplaceBackOutput
(
dense_out
);
Tensor
out
;
...
...
@@ -238,10 +237,10 @@ Tensor scale_switch_case(const Tensor& x,
auto
dense_x
=
std
::
dynamic_pointer_cast
<
pten
::
DenseTensor
>
(
x
.
impl
());
auto
out_meta
=
pten
::
UnchangedInferMeta
(
dense_x
->
meta
());
const
auto
allocator
=
std
::
make_shared
<
paddle
::
experimental
::
DefaultAllocator
>
(
pten
::
TransToFluidPlace
(
kernel_backend
))
;
auto
dense_out
=
std
::
make_shared
<
pten
::
DenseTensor
>
(
allocator
,
out_meta
);
auto
dense_out
=
std
::
make_shared
<
pten
::
DenseTensor
>
(
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
pten
::
TransToFluidPlace
(
kernel_backend
))
,
std
::
move
(
out_meta
)
);
Tensor
out
;
out
.
set_impl
(
dense_out
);
...
...
python/paddle/utils/code_gen/api_gen.py
浏览文件 @
e78eb3f4
...
...
@@ -303,10 +303,10 @@ PADDLE_API {self.output} {self.api}({self.args["args_define"]}) {{
auto* dev_ctx = GetDeviceContextByBackend(kernel_backend);
{
input_tensors
}
{
self
.
gene_infer_meta
(
self
.
args
[
'inputs'
][
'names'
],
self
.
args
[
'attrs'
][
'names'
],
self
.
infer_meta
)
}
const auto allocator =
std::make_shared<paddle::experimental::DefaultAllocator
>(
pten::TransToFluidPlace(kernel_backend));
auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta
);
auto dense_out = std::make_shared<pten::DenseTensor>(
pten::make_intrusive<paddle::experimental::SharedStorage
>(
pten::TransToFluidPlace(kernel_backend)),
std::move(out_meta)
);
Tensor out;
out.set_impl(dense_out);
...
...
@@ -345,7 +345,7 @@ def source_include(header_file_path):
#include "paddle/pten/api/lib/api_registry.h"
#include "paddle/pten/api/lib/kernel_declare.h"
#include "paddle/pten/api/lib/kernel_dispatch.h"
#include "paddle/pten/api/lib/utils/
allocator
.h"
#include "paddle/pten/api/lib/utils/
storage
.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/include/core.h"
#include "paddle/pten/include/infermeta.h"
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录