Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Oneflow-Inc
oneflow
提交
e3066df5
O
oneflow
项目概览
Oneflow-Inc
/
oneflow
上一次同步 2 年多
通知
13
Star
2733
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
oneflow
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
e3066df5
编写于
7月 17, 2017
作者:
K
kingsmad
提交者:
chengtbf
7月 17, 2017
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix assert_float_eq in blobcmp && issues in Conv kernel (#207)
* fix assert_float_eq in blobcmp * fix cuda_stream
上级
16be8e81
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
8 addition
and
5 deletion
+8
-5
oneflow/core/kernel/convolution_kernel.cu
oneflow/core/kernel/convolution_kernel.cu
+4
-2
oneflow/core/kernel/kernel_test_common.cpp
oneflow/core/kernel/kernel_test_common.cpp
+1
-1
oneflow/core/kernel/rmsprop_model_update_kernel.cpp
oneflow/core/kernel/rmsprop_model_update_kernel.cpp
+2
-1
oneflow/core/kernel/rmsprop_model_update_kernel.cu
oneflow/core/kernel/rmsprop_model_update_kernel.cu
+1
-1
未找到文件。
oneflow/core/kernel/convolution_kernel.cu
浏览文件 @
e3066df5
...
...
@@ -102,7 +102,8 @@ class ConvolutionKernelUtil<DeviceType::kGPU, FloatingPointType> final {
(
width
+
2
*
pad_w
-
(
dilation_w
*
(
kernel_w
-
1
)
+
1
))
/
stride_w
+
1
;
int
num_kernels
=
channels
*
height_col
*
width_col
;
Im2ColGpuKernel
<
FloatingPointType
>
<<<
BlocksNum4ThreadsNum
(
num_kernels
),
kCudaThreadsNumPerBlock
>>>
(
<<<
BlocksNum4ThreadsNum
(
num_kernels
),
kCudaThreadsNumPerBlock
,
0
,
ctx
.
device_ctx
->
cuda_stream
()
>>>
(
num_kernels
,
data_im
,
height
,
width
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
height_col
,
width_col
,
data_col
);
...
...
@@ -122,7 +123,8 @@ class ConvolutionKernelUtil<DeviceType::kGPU, FloatingPointType> final {
// To avoid involving atomic operations, we will launch one kernel per
// bottom dimension, and then in the kernel add up the top dimensions.
Col2ImGpuKernel
<
FloatingPointType
>
<<<
BlocksNum4ThreadsNum
(
num_kernels
),
kCudaThreadsNumPerBlock
>>>
(
<<<
BlocksNum4ThreadsNum
(
num_kernels
),
kCudaThreadsNumPerBlock
,
0
,
ctx
.
device_ctx
->
cuda_stream
()
>>>
(
num_kernels
,
data_col
,
height
,
width
,
channels
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
height_col
,
width_col
,
data_im
);
...
...
oneflow/core/kernel/kernel_test_common.cpp
浏览文件 @
e3066df5
...
...
@@ -62,7 +62,7 @@ class KernelTestCommon<DeviceType::kCPU, FloatingPointType> final {
size_t
dptr_size
=
lhs
->
shape
().
elem_cnt
();
for
(
size_t
i
=
0
;
i
<
dptr_size
;
++
i
)
{
ASSERT_
NEAR
(
dptr_lhs
[
i
],
dptr_rhs
[
i
],
0.0000001
);
ASSERT_
FLOAT_EQ
(
dptr_lhs
[
i
],
dptr_rhs
[
i
]
);
}
}
...
...
oneflow/core/kernel/rmsprop_model_update_kernel.cpp
浏览文件 @
e3066df5
...
...
@@ -57,7 +57,8 @@ class RMSPropMdUpdateKernelUtil<DeviceType::kCPU, FloatingPointType> final {
const
FloatingPointType
alpha
)
{
ctx
.
device_ctx
->
cpu_stream
()
->
SendWork
([
=
]()
{
for
(
int64_t
i
=
0
;
i
<
n
;
++
i
)
{
model
[
i
]
-=
alpha
*
model_diff
[
i
]
/
(
std
::
sqrt
(
mean_square
[
i
])
+
epsilon
);
model
[
i
]
-=
alpha
*
model_diff
[
i
]
/
(
std
::
sqrt
(
mean_square
[
i
])
+
epsilon
);
}
});
}
...
...
oneflow/core/kernel/rmsprop_model_update_kernel.cu
浏览文件 @
e3066df5
...
...
@@ -25,7 +25,7 @@ __global__ void UpdateModelGpu(const int64_t n, FloatingPointType* model,
const
FloatingPointType
epsilon
,
const
FloatingPointType
alpha
)
{
CUDA_1D_KERNEL_LOOP
(
i
,
n
)
{
model
[
i
]
-=
alpha
*
model_diff
[
i
]
/
(
std
::
sqrt
(
mean_square
[
i
])
+
epsilon
);
model
[
i
]
-=
alpha
*
model_diff
[
i
]
/
(
std
::
sqrt
(
mean_square
[
i
])
+
epsilon
);
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录