Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Oneflow-Inc
OneFlow-Benchmark
提交
1f53172c
O
OneFlow-Benchmark
项目概览
Oneflow-Inc
/
OneFlow-Benchmark
上一次同步 2 年多
通知
1
Star
92
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
OneFlow-Benchmark
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
1f53172c
编写于
2月 08, 2020
作者:
S
ShawnXuan
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix
上级
28556b9c
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
26 addition
and
19 deletion
+26
-19
cnn_benchmark/dali.py
cnn_benchmark/dali.py
+6
-4
cnn_benchmark/of_cnn_train_val.py
cnn_benchmark/of_cnn_train_val.py
+13
-8
cnn_benchmark/resnet_model.py
cnn_benchmark/resnet_model.py
+1
-1
run.sh
run.sh
+2
-2
test.sh
test.sh
+4
-4
未找到文件。
cnn_benchmark/dali.py
浏览文件 @
1f53172c
...
...
@@ -14,7 +14,7 @@
import
numpy
as
np
import
time
import
logging
#
import logging
import
warnings
from
nvidia
import
dali
from
nvidia.dali.pipeline
import
Pipeline
...
...
@@ -68,7 +68,8 @@ class HybridTrainPipe(Pipeline):
self
.
resize
=
ops
.
RandomResizedCrop
(
device
=
dali_resize_device
,
size
=
crop_shape
)
self
.
cmnp
=
ops
.
CropMirrorNormalize
(
device
=
dali_resize_device
,
#"gpu",
#self.cmnp = ops.CropMirrorNormalize(device=dali_resize_device, #"gpu",
self
.
cmnp
=
ops
.
CropMirrorNormalize
(
device
=
"gpu"
,
output_dtype
=
types
.
FLOAT16
if
dtype
==
'float16'
else
types
.
FLOAT
,
output_layout
=
output_layout
,
crop
=
crop_shape
,
pad_output
=
pad_output
,
image_type
=
types
.
RGB
,
mean
=
args
.
rgb_mean
,
std
=
args
.
rgb_std
)
...
...
@@ -103,7 +104,8 @@ class HybridValPipe(Pipeline):
host_memory_padding
=
nvjpeg_padding
)
print
(
dali_device
)
self
.
resize
=
ops
.
Resize
(
device
=
dali_device
,
resize_shorter
=
resize_shp
)
if
resize_shp
else
None
self
.
cmnp
=
ops
.
CropMirrorNormalize
(
device
=
dali_device
,
#"gpu",
#self.cmnp = ops.CropMirrorNormalize(device=dali_device,#"gpu",
self
.
cmnp
=
ops
.
CropMirrorNormalize
(
device
=
"gpu"
,
output_dtype
=
types
.
FLOAT16
if
dtype
==
'float16'
else
types
.
FLOAT
,
output_layout
=
output_layout
,
crop
=
crop_shape
,
pad_output
=
pad_output
,
image_type
=
types
.
RGB
,
mean
=
args
.
rgb_mean
,
std
=
args
.
rgb_std
)
...
...
@@ -274,7 +276,7 @@ class DALIGenericIterator(object):
with
p
.
_check_api_type_scope
(
types
.
PipelineAPIType
.
ITERATOR
):
p
.
schedule_run
()
else
:
logging
.
warning
(
"DALI iterator does not support resetting while epoch is not finished. Ignoring..."
)
print
(
"DALI iterator does not support resetting while epoch is not finished. Ignoring..."
)
def
get_rec_iter
(
args
,
dali_cpu
=
False
,
todo
=
True
):
...
...
cnn_benchmark/of_cnn_train_val.py
浏览文件 @
1f53172c
...
...
@@ -6,7 +6,6 @@ import os
import
time
import
math
import
numpy
as
np
import
logging
import
oneflow
as
flow
...
...
@@ -170,25 +169,31 @@ def main():
train_data_iter
,
val_data_iter
=
get_rec_iter
(
args
,
True
)
timer
.
start
()
for
epoch
in
range
(
args
.
num_epochs
):
print
(
'Starting epoch {}'
.
format
(
epoch
))
tic
=
time
.
time
()
print
(
'Starting epoch {} at {:.2f}'
.
format
(
epoch
,
tic
))
train_data_iter
.
reset
()
for
i
,
batches
in
enumerate
(
train_data_iter
):
assert
len
(
batches
)
==
1
images
,
labels
=
batches
[
0
]
TrainNet
(
images
,
labels
.
astype
(
np
.
int32
)).
async_get
(
train_callback
(
epoch
,
i
))
if
i
>
30
:
#debug
break
break
print
(
time
.
time
()
-
tic
)
#
if i > 30:#debug
#
break
#
break
print
(
'epoch {} training time: {:.2f}'
.
format
(
epoch
,
time
.
time
()
-
tic
)
)
if
args
.
data_val
:
tic
=
time
.
time
()
val_data_iter
.
reset
()
for
i
,
batches
in
enumerate
(
val_data_iter
):
assert
len
(
batches
)
==
1
images
,
labels
=
batches
[
0
]
InferenceNet
(
images
,
labels
.
astype
(
np
.
int32
)).
async_get
(
predict_callback
(
epoch
,
i
))
print
(
time
.
time
()
-
tic
)
#InferenceNet(images, labels.astype(np.int32)).async_get(predict_callback(epoch, i))
acc_acc
(
i
,
InferenceNet
(
images
,
labels
.
astype
(
np
.
int32
)).
get
())
assert
main
.
total
>
0
top1_accuracy
=
main
.
correct
/
main
.
total
summary
.
scalar
(
'top1_accuracy'
,
top1_accuracy
,
epoch
)
print
(
"epoch {}, top 1 accuracy: {:.6f}, val_time: {:.2f}"
.
format
(
epoch
,
top1_accuracy
,
time
.
time
()
-
tic
))
snapshot
.
save
(
'epoch_{}'
.
format
(
epoch
+
1
))
...
...
cnn_benchmark/resnet_model.py
浏览文件 @
1f53172c
...
...
@@ -39,7 +39,7 @@ def _batch_norm(inputs, name=None, trainable=True):
inputs
=
inputs
,
axis
=
1
,
momentum
=
0.9
,
#97,
epsilon
=
1e05
,
#1.001e-5,
epsilon
=
1e
-
05
,
#1.001e-5,
center
=
True
,
scale
=
True
,
trainable
=
trainable
,
...
...
run.sh
浏览文件 @
1f53172c
...
...
@@ -12,9 +12,9 @@ DATA_ROOT=/dataset/imagenet-mxnet
--optimizer
=
"momentum-cosine-decay"
\
--weight_l2
=
3.0517578125e-05
\
--learning_rate
=
0.256
\
--loss_print_every_n_iter
=
1
0
\
--loss_print_every_n_iter
=
2
0
\
--batch_size_per_device
=
64
\
--val_batch_size_per_device
=
1
00
\
--val_batch_size_per_device
=
1
25
\
--model
=
"resnet50"
#--weight_l2=3.0517578125e-05 \
#--num_examples=1024 \
...
...
test.sh
浏览文件 @
1f53172c
...
...
@@ -3,10 +3,10 @@ rm -rf core.*
#DATA_ROOT=/mnt/13_nfs/xuan/ImageNet
DATA_ROOT
=
/dataset/imagenet-mxnet
python cnn_benchmark/dali.py
\
--data_train
=
$DATA_ROOT
/
mxnet/
train.rec
\
--data_train_idx
=
$DATA_ROOT
/
mxnet/
train.idx
\
--data_val
=
$DATA_ROOT
/
mxnet/
val.rec
\
--data_val_idx
=
$DATA_ROOT
/
mxnet/
val.idx
\
--data_train
=
$DATA_ROOT
/train.rec
\
--data_train_idx
=
$DATA_ROOT
/train.idx
\
--data_val
=
$DATA_ROOT
/val.rec
\
--data_val_idx
=
$DATA_ROOT
/val.idx
\
--val_batch_size_per_device
=
20
\
--gpu_num_per_node
=
4
\
--num_examples
=
1024
\
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录