Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Oneflow-Inc
oneflow
提交
5bfbd60f
O
oneflow
项目概览
Oneflow-Inc
/
oneflow
上一次同步 接近 3 年
通知
13
Star
2733
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
oneflow
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
5bfbd60f
编写于
2月 13, 2019
作者:
S
ScXfjiang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
correctness
Former-commit-id: d1eba08d1903fceb50bf9e0f77bf4548b2ddedb4
上级
31c8f14d
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
35 addition
and
10 deletion
+35
-10
oneflow/core/kernel/top_k_kernel.cpp
oneflow/core/kernel/top_k_kernel.cpp
+28
-5
oneflow/core/kernel/top_k_kernel.cu
oneflow/core/kernel/top_k_kernel.cu
+2
-2
oneflow/core/operator/top_k_op.cpp
oneflow/core/operator/top_k_op.cpp
+5
-3
未找到文件。
oneflow/core/kernel/top_k_kernel.cpp
浏览文件 @
5bfbd60f
...
...
@@ -7,8 +7,27 @@ namespace oneflow {
namespace
{
template
<
typename
T
>
void
ForwardPartDataContent
(
const
T
*
in
,
const
Range
range
,
const
int32_t
instance_size
,
const
int32_t
k
,
const
bool
sorted
,
int32_t
*
fw_buf
,
int32_t
*
out
)
{
void
ForwardPartDataContentTopOne
(
const
T
*
in
,
const
Range
range
,
const
int32_t
instance_size
,
int32_t
*
out
)
{
FOR_RANGE
(
int32_t
,
i
,
range
.
begin
(),
range
.
end
())
{
const
int32_t
offset
=
i
*
instance_size
;
const
T
*
values
=
in
+
offset
;
T
max_val
=
GetMinVal
<
T
>
();
int32_t
max_idx
=
-
1
;
FOR_RANGE
(
int32_t
,
j
,
0
,
instance_size
)
{
if
(
values
[
j
]
>
max_val
)
{
max_val
=
values
[
j
];
max_idx
=
j
;
}
}
out
[
i
]
=
max_idx
;
}
}
template
<
typename
T
>
void
ForwardPartDataContentTopK
(
const
T
*
in
,
const
Range
range
,
const
int32_t
instance_size
,
const
int32_t
k
,
const
bool
sorted
,
int32_t
*
fw_buf
,
int32_t
*
out
)
{
CHECK_NOTNULL
(
fw_buf
);
FOR_RANGE
(
int32_t
,
i
,
range
.
begin
(),
range
.
end
())
{
const
int32_t
offset
=
i
*
instance_size
;
int32_t
*
indices
=
fw_buf
+
offset
;
...
...
@@ -24,7 +43,7 @@ void ForwardPartDataContent(const T* in, const Range range, const int32_t instan
}
};
std
::
nth_element
(
indices
,
indices
+
k
,
indices
+
instance_size
,
comp
);
if
(
k
>
1
&&
sorted
)
{
std
::
sort
(
indices
,
indices
+
k
,
comp
);
}
if
(
sorted
)
{
std
::
sort
(
indices
,
indices
+
k
,
comp
);
}
std
::
copy
(
indices
,
indices
+
k
,
out
+
i
*
k
);
}
}
...
...
@@ -43,7 +62,11 @@ struct TopKKernelUtil<DeviceType::kCPU, T> {
FOR_RANGE
(
int32_t
,
part_id
,
0
,
part_num
)
{
Range
range
=
bs
.
At
(
part_id
);
Global
<
ThreadMgr
>::
Get
()
->
compute_thread_pool
()
->
AddWork
([
=
,
&
bc
]()
{
ForwardPartDataContent
(
in
,
range
,
instance_size
,
k
,
sorted
,
fw_buf
,
out
);
if
(
k
==
1
)
{
ForwardPartDataContentTopOne
(
in
,
range
,
instance_size
,
out
);
}
else
{
ForwardPartDataContentTopK
(
in
,
range
,
instance_size
,
k
,
sorted
,
fw_buf
,
out
);
}
bc
.
Decrease
();
});
}
...
...
@@ -62,7 +85,7 @@ void TopKKernel<device_type, T>::ForwardDataContent(
const
int32_t
instance_size
=
static_cast
<
int32_t
>
(
in_blob
->
shape
().
dim_vec
().
back
());
const
int32_t
instance_num
=
static_cast
<
int32_t
>
(
in_blob
->
shape
().
elem_cnt
()
/
instance_size
);
const
T
*
in
=
in_blob
->
dptr
<
T
>
();
int32_t
*
fw_buf
=
fw_buf_blob
->
mut_dptr
<
int32_t
>
()
;
int32_t
*
fw_buf
=
fw_buf_blob
?
fw_buf_blob
->
mut_dptr
<
int32_t
>
()
:
nullptr
;
int32_t
*
out
=
out_blob
->
mut_dptr
<
int32_t
>
();
const
auto
&
conf
=
this
->
op_conf
().
top_k_conf
();
TopKKernelUtil
<
device_type
,
T
>::
Forward
(
ctx
.
device_ctx
,
in
,
instance_num
,
instance_size
,
conf
.
k
(),
...
...
oneflow/core/kernel/top_k_kernel.cu
浏览文件 @
5bfbd60f
...
...
@@ -10,7 +10,7 @@ namespace oneflow {
template
<
typename
T
>
__global__
void
ForwardGpu
(
const
T
*
in
,
const
int32_t
instance_num
,
const
int32_t
instance_size
,
const
int32_t
k
,
const
bool
sorted
,
int32_t
*
fw_buf
,
int32_t
*
out
)
{
int32_t
*
out
)
{
CUDA_1D_KERNEL_LOOP
(
i
,
instance_num
)
{
T
max_val
=
in
[
i
*
instance_size
];
int32_t
max_idx
=
0
;
...
...
@@ -33,7 +33,7 @@ struct TopKKernelUtil<DeviceType::kGPU, T> {
// GPU version top_k op only support "k == 1" for now
CHECK_EQ
(
k
,
1
);
ForwardGpu
<<<
BlocksNum4ThreadsNum
(
instance_num
),
kCudaThreadsNumPerBlock
,
0
,
ctx
->
cuda_stream
()
>>>
(
in
,
instance_num
,
instance_size
,
k
,
sorted
,
fw_buf
,
out
);
ctx
->
cuda_stream
()
>>>
(
in
,
instance_num
,
instance_size
,
out
);
}
};
...
...
oneflow/core/operator/top_k_op.cpp
浏览文件 @
5bfbd60f
...
...
@@ -18,9 +18,11 @@ void TopKOp::InferBlobDescs(std::function<BlobDesc*(const std::string&)> GetBlob
CHECK_GE
(
conf
.
k
(),
1
);
CHECK_LE
(
conf
.
k
(),
in
->
shape
().
dim_vec
().
back
());
// fw_buf
BlobDesc
*
fw_buf
=
GetBlobDesc4BnInOp
(
"fw_buf"
);
fw_buf
->
mut_shape
()
=
Shape
({
in
->
shape
()});
fw_buf
->
set_data_type
(
DataType
::
kInt32
);
if
(
conf
.
k
()
>
1
)
{
BlobDesc
*
fw_buf
=
GetBlobDesc4BnInOp
(
"fw_buf"
);
fw_buf
->
mut_shape
()
=
Shape
({
in
->
shape
()});
fw_buf
->
set_data_type
(
DataType
::
kInt32
);
}
// out
BlobDesc
*
out
=
GetBlobDesc4BnInOp
(
"out"
);
*
out
=
*
in
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录