Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
a5021e89
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
a5021e89
编写于
9月 14, 2022
作者:
L
Leo Chen
提交者:
GitHub
9月 14, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add check_memory_continue kernel (#45999)
上级
e8809d99
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
141 addition
and
0 deletion
+141
-0
paddle/phi/kernels/check_memory_continue_kernel.cc
paddle/phi/kernels/check_memory_continue_kernel.cc
+101
-0
paddle/phi/kernels/check_memory_continue_kernel.h
paddle/phi/kernels/check_memory_continue_kernel.h
+40
-0
未找到文件。
paddle/phi/kernels/check_memory_continue_kernel.cc
0 → 100644
浏览文件 @
a5021e89
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/check_memory_continue_kernel.h"
#include <sstream>
#include <vector>
#include "glog/logging.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/fluid/platform/device_memory_aligment.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
CheckMemoryContinueKernel
(
const
Context
&
dev_ctx
,
const
std
::
vector
<
const
DenseTensor
*>
&
input
,
DenseTensor
*
output
,
std
::
vector
<
DenseTensor
*>
xout
)
{
int64_t
size_of_dtype
=
sizeof
(
T
);
auto
dtype
=
input
.
at
(
0
)
->
dtype
();
int64_t
numel
=
0
;
// check address
for
(
size_t
i
=
1
;
i
<
input
.
size
();
++
i
)
{
PADDLE_ENFORCE_EQ
(
dtype
,
input
.
at
(
i
)
->
dtype
(),
errors
::
InvalidArgument
(
"The DataType of input tensors of fake_coalesce should be "
"consistent, current dtype is: %s, but the previous dtype is %s"
,
dtype
,
input
.
at
(
i
)
->
dtype
()));
const
void
*
cur_address
=
input
.
at
(
i
-
1
)
->
data
();
int64_t
len
=
input
.
at
(
i
-
1
)
->
numel
();
auto
offset
=
paddle
::
platform
::
Alignment
(
len
*
size_of_dtype
,
dev_ctx
.
GetPlace
());
void
*
infer_next_address
=
reinterpret_cast
<
void
*>
(
reinterpret_cast
<
uintptr_t
>
(
cur_address
)
+
offset
);
const
void
*
next_address
=
input
.
at
(
i
)
->
data
();
numel
+=
offset
;
VLOG
(
10
)
<<
::
paddle
::
string
::
Sprintf
(
"Input[%d] address: 0X%02x, Input[%d] address: 0X%02x, Infer "
"input[%d] address: 0X%02x, offset: %d."
,
i
-
1
,
cur_address
,
i
,
next_address
,
i
,
infer_next_address
,
offset
);
PADDLE_ENFORCE_EQ
(
infer_next_address
,
next_address
,
errors
::
InvalidArgument
(
"The infered address of the next tensor should be equal to the "
"real address of the next tensor. But got infered address is %p "
"and real address is %p."
,
infer_next_address
,
next_address
));
}
numel
+=
paddle
::
platform
::
Alignment
(
(
*
input
.
rbegin
())
->
numel
()
*
size_of_dtype
,
dev_ctx
.
GetPlace
());
// reset holder, do inplace
output
->
ShareBufferWith
(
*
input
.
at
(
0
));
output
->
Resize
({
numel
/
size_of_dtype
});
VLOG
(
4
)
<<
"addr:"
<<
output
->
data
<
T
>
();
}
}
// namespace phi
PD_REGISTER_KERNEL
(
check_memory_continue
,
CPU
,
ALL_LAYOUT
,
phi
::
CheckMemoryContinueKernel
,
int
,
float
,
double
)
{}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_REGISTER_KERNEL
(
check_memory_continue
,
GPU
,
ALL_LAYOUT
,
phi
::
CheckMemoryContinueKernel
,
phi
::
dtype
::
float16
,
int
,
float
,
double
)
{}
#endif
paddle/phi/kernels/check_memory_continue_kernel.h
0 → 100644
浏览文件 @
a5021e89
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vector>
#include "paddle/phi/core/dense_tensor.h"
namespace
phi
{
// WHY add this op?
// This op is used for convert fused_all_reduce_op_handle in Graph to Program.
// i.e, fused_all_reduce_op_handle = check_memory_continue + c_allreduce_sum
// There are two reasons that check_memory_continue is added:
// 1. c_allreduce_sum takes single tensor as input, while
// fused_all_reduce_op_handle takse tensor array as input, so we need a op to
// convert tensor array into a single tensor
// 2. fused_all_reduce_op_handle has a premise that all tensor's addresses are
// continue, so we need a op to do the check.
// see details in fused_all_reduce_op_handle.cc
template
<
typename
T
,
typename
Context
>
void
CheckMemoryContinueKernel
(
const
Context
&
dev_ctx
,
const
std
::
vector
<
const
DenseTensor
*>
&
input
,
DenseTensor
*
output
,
std
::
vector
<
DenseTensor
*>
xout
);
}
// namespace phi
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录