Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
a85592bc
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
a85592bc
编写于
9月 26, 2020
作者:
Z
Zhong Hui
提交者:
GitHub
9月 26, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix cpplint error for the autmic max/min
fix cpplint error for the autmic max/min
上级
ecfdfc9c
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
20 addition
and
21 deletion
+20
-21
paddle/fluid/operators/math/segment_pooling.cu
paddle/fluid/operators/math/segment_pooling.cu
+8
-9
paddle/fluid/platform/cuda_primitives.h
paddle/fluid/platform/cuda_primitives.h
+12
-12
未找到文件。
paddle/fluid/operators/math/segment_pooling.cu
浏览文件 @
a85592bc
...
...
@@ -12,13 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include
"paddle/fluid/operators/elementwise/elementwise_div_op.h"
#include
<algorithm>
#include "paddle/fluid/operators/gather.cu.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/segment_pooling.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/gpu_launch_param_config.h"
#include "paddle/fluid/platform/macros.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -100,7 +99,7 @@ __global__ void SegmentOpsKernel(const Index* segment_ids, const T* input,
CUDA_KERNEL_LOOP
(
stripe_index
,
h
.
total_stripe_count
)
{
Index
segment_offset
,
dim_index_base
,
actual_height
;
Index
inner_dim_size
=
h
.
inner_dim_size
;
h
.
calculate
(
stripe_index
,
segment_offset
,
dim_index_base
,
actual_height
);
h
.
calculate
(
stripe_index
,
&
segment_offset
,
&
dim_index_base
,
&
actual_height
);
T
minmax
=
pool
.
initial
();
Index
first_segment_id
=
segment_ids
[
dim_index_base
];
...
...
@@ -154,7 +153,7 @@ __global__ void SegmentIndexGradKernel(const Index* segment_ids, const T* input,
T
*
in_grad
,
Helper
h
)
{
CUDA_KERNEL_LOOP
(
stripe_index
,
h
.
total_stripe_count
)
{
Index
segment_offset
,
dim_index_base
,
actual_height
;
h
.
calculate
(
stripe_index
,
segment_offset
,
dim_index_base
,
actual_height
);
h
.
calculate
(
stripe_index
,
&
segment_offset
,
&
dim_index_base
,
&
actual_height
);
for
(
Index
j
=
0
;
j
<
actual_height
;
j
++
)
{
Index
current_segment_id
=
segment_ids
[
dim_index_base
+
j
];
...
...
@@ -217,11 +216,11 @@ class ArrangeHelper {
total_stripe_count
=
inner_dim_size
*
input_outer_dim_num_stripe
;
}
DEVICE
inline
void
calculate
(
T
stripe_index
,
T
&
segment_offset
,
T
&
dim_index_base
,
T
&
actual_height
)
{
segment_offset
=
stripe_index
%
inner_dim_size
;
dim_index_base
=
stripe_index
/
inner_dim_size
*
DimTileSize
;
actual_height
=
min
(
DimTileSize
,
input_length_size
-
dim_index_base
);
DEVICE
inline
void
calculate
(
T
stripe_index
,
T
*
segment_offset
,
T
*
dim_index_base
,
T
*
actual_height
)
{
*
segment_offset
=
stripe_index
%
inner_dim_size
;
*
dim_index_base
=
stripe_index
/
inner_dim_size
*
DimTileSize
;
*
actual_height
=
min
(
DimTileSize
,
input_length_size
-
*
dim_index_base
);
}
};
...
...
paddle/fluid/platform/cuda_primitives.h
浏览文件 @
a85592bc
...
...
@@ -137,12 +137,12 @@ USE_CUDA_ATOMIC(Max, unsigned int);
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
USE_CUDA_ATOMIC
(
Max
,
unsigned
long
long
int
);
// NOLINT
#else
CUDA_ATOMIC_WRAPPER
(
Max
,
unsigned
long
long
int
)
{
CUDA_ATOMIC_WRAPPER
(
Max
,
unsigned
long
long
int
)
{
// NOLINT
if
(
*
address
>=
val
)
{
return
;
}
unsigned
long
long
int
old
=
*
address
,
assumed
;
unsigned
long
long
int
old
=
*
address
,
assumed
;
// NOLINT
do
{
assumed
=
old
;
...
...
@@ -169,7 +169,7 @@ CUDA_ATOMIC_WRAPPER(Max, float) {
return
;
}
int
*
const
address_as_i
=
(
int
*
)
address
;
int
*
const
address_as_i
=
reinterpret_cast
<
int
*>
(
address
)
;
int
old
=
*
address_as_i
,
assumed
;
do
{
...
...
@@ -187,9 +187,9 @@ CUDA_ATOMIC_WRAPPER(Max, double) {
return
;
}
unsigned
long
long
int
*
const
address_as_ull
=
(
unsigned
long
long
int
*
)
address
;
unsigned
long
long
int
old
=
*
address_as_ull
,
assumed
;
unsigned
long
long
int
*
const
address_as_ull
=
// NOLINT
reinterpret_cast
<
unsigned
long
long
int
*>
(
address
);
// NOLINT
unsigned
long
long
int
old
=
*
address_as_ull
,
assumed
;
// NOLINT
do
{
assumed
=
old
;
...
...
@@ -209,12 +209,12 @@ USE_CUDA_ATOMIC(Min, unsigned int);
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
USE_CUDA_ATOMIC
(
Min
,
unsigned
long
long
int
);
// NOLINT
#else
CUDA_ATOMIC_WRAPPER
(
Min
,
unsigned
long
long
int
)
{
CUDA_ATOMIC_WRAPPER
(
Min
,
unsigned
long
long
int
)
{
// NOLINT
if
(
*
address
<=
val
)
{
return
;
}
unsigned
long
long
int
old
=
*
address
,
assumed
;
unsigned
long
long
int
old
=
*
address
,
assumed
;
// NOLINT
do
{
assumed
=
old
;
...
...
@@ -241,7 +241,7 @@ CUDA_ATOMIC_WRAPPER(Min, float) {
return
;
}
int
*
const
address_as_i
=
(
int
*
)
address
;
int
*
const
address_as_i
=
reinterpret_cast
<
int
*>
(
address
)
;
int
old
=
*
address_as_i
,
assumed
;
do
{
...
...
@@ -259,9 +259,9 @@ CUDA_ATOMIC_WRAPPER(Min, double) {
return
;
}
unsigned
long
long
int
*
const
address_as_ull
=
(
unsigned
long
long
int
*
)
address
;
unsigned
long
long
int
old
=
*
address_as_ull
,
assumed
;
unsigned
long
long
int
*
const
address_as_ull
=
// NOLINT
reinterpret_cast
<
unsigned
long
long
int
*>
(
address
);
// NOLINT
unsigned
long
long
int
old
=
*
address_as_ull
,
assumed
;
// NOLINT
do
{
assumed
=
old
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录