Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
3231c4ab
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
3231c4ab
编写于
6月 24, 2020
作者:
L
lizhenyu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add wide&deep stanalone training script for gpu in model zoo
上级
c74b1685
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
60 addition
and
14 deletion
+60
-14
model_zoo/wide_and_deep/README.md
model_zoo/wide_and_deep/README.md
+3
-0
model_zoo/wide_and_deep/eval.py
model_zoo/wide_and_deep/eval.py
+4
-3
model_zoo/wide_and_deep/script/run_multigpu_train.sh
model_zoo/wide_and_deep/script/run_multigpu_train.sh
+2
-1
model_zoo/wide_and_deep/script/run_standalone_train_for_gpu.sh
..._zoo/wide_and_deep/script/run_standalone_train_for_gpu.sh
+27
-0
model_zoo/wide_and_deep/train.py
model_zoo/wide_and_deep/train.py
+5
-4
model_zoo/wide_and_deep/train_and_eval.py
model_zoo/wide_and_deep/train_and_eval.py
+7
-4
model_zoo/wide_and_deep/train_and_eval_auto_parallel.py
model_zoo/wide_and_deep/train_and_eval_auto_parallel.py
+3
-0
model_zoo/wide_and_deep/train_and_eval_distribute.py
model_zoo/wide_and_deep/train_and_eval_distribute.py
+9
-2
未找到文件。
model_zoo/wide_and_deep/README.md
浏览文件 @
3231c4ab
...
...
@@ -37,6 +37,7 @@ To train and evaluate the model, command as follows:
python train_and_eval.py
```
Arguments:
*
`--device_target`
: Device where the code will be implemented (Default: Ascend).
*
`--data_path`
: This should be set to the same directory given to the data_download's data_dir argument.
*
`--epochs`
: Total train epochs.
*
`--batch_size`
: Training batch size.
...
...
@@ -57,6 +58,7 @@ To train the model in one device, command as follows:
python train.py
```
Arguments:
*
`--device_target`
: Device where the code will be implemented (Default: Ascend).
*
`--data_path`
: This should be set to the same directory given to the data_download's data_dir argument.
*
`--epochs`
: Total train epochs.
*
`--batch_size`
: Training batch size.
...
...
@@ -87,6 +89,7 @@ To evaluate the model, command as follows:
python eval.py
```
Arguments:
*
`--device_target`
: Device where the code will be implemented (Default: Ascend).
*
`--data_path`
: This should be set to the same directory given to the data_download's data_dir argument.
*
`--epochs`
: Total train epochs.
*
`--batch_size`
: Training batch size.
...
...
model_zoo/wide_and_deep/eval.py
浏览文件 @
3231c4ab
...
...
@@ -26,11 +26,11 @@ from src.datasets import create_dataset
from
src.metrics
import
AUCMetric
from
src.config
import
WideDeepConfig
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
"Davinci"
,
save_graphs
=
True
)
def
get_WideDeep_net
(
config
):
"""
Get network of wide&deep model.
"""
WideDeep_net
=
WideDeepModel
(
config
)
loss_net
=
NetWithLossClass
(
WideDeep_net
,
config
)
...
...
@@ -91,4 +91,5 @@ if __name__ == "__main__":
widedeep_config
=
WideDeepConfig
()
widedeep_config
.
argparse_init
()
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
widedeep_config
.
device_target
)
test_eval
(
widedeep_config
)
model_zoo/wide_and_deep/script/run_multigpu_train.sh
浏览文件 @
3231c4ab
...
...
@@ -14,7 +14,7 @@
# limitations under the License.
# ============================================================================
# bash run_multigpu_train.sh
# bash run_multigpu_train.sh
RANK_SIZE EPOCH_SIZE DATASET
script_self
=
$(
readlink
-f
"
$0
"
)
self_path
=
$(
dirname
"
${
script_self
}
"
)
RANK_SIZE
=
$1
...
...
@@ -25,4 +25,5 @@ mpirun --allow-run-as-root -n $RANK_SIZE \
python
-s
${
self_path
}
/../train_and_eval_distribute.py
\
--device_target
=
"GPU"
\
--data_path
=
$DATASET
\
--batch_size
=
8000
\
--epochs
=
$EPOCH_SIZE
>
log.txt 2>&1 &
model_zoo/wide_and_deep/script/run_standalone_train_for_gpu.sh
0 → 100644
浏览文件 @
3231c4ab
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
# bash run_standalone_train_for_gpu.sh EPOCH_SIZE DATASET
script_self
=
$(
readlink
-f
"
$0
"
)
self_path
=
$(
dirname
"
${
script_self
}
"
)
EPOCH_SIZE
=
$1
DATASET
=
$2
python
-s
${
self_path
}
/../train_and_eval.py
\
--device_target
=
"GPU"
\
--data_path
=
$DATASET
\
--batch_size
=
16000
\
--epochs
=
$EPOCH_SIZE
>
log.txt 2>&1 &
model_zoo/wide_and_deep/train.py
浏览文件 @
3231c4ab
...
...
@@ -15,16 +15,16 @@
import
os
from
mindspore
import
Model
,
context
from
mindspore.train.callback
import
ModelCheckpoint
,
CheckpointConfig
,
TimeMonitor
from
src.wide_and_deep
import
PredictWithSigmoid
,
TrainStepWrap
,
NetWithLossClass
,
WideDeepModel
from
src.callbacks
import
LossCallBack
from
src.datasets
import
create_dataset
from
src.config
import
WideDeepConfig
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
"Ascend"
,
save_graphs
=
True
)
def
get_WideDeep_net
(
configure
):
"""
Get network of wide&deep model.
"""
WideDeep_net
=
WideDeepModel
(
configure
)
loss_net
=
NetWithLossClass
(
WideDeep_net
,
configure
)
...
...
@@ -72,7 +72,7 @@ def test_train(configure):
model
=
Model
(
train_net
)
callback
=
LossCallBack
(
config
=
configure
)
ckptconfig
=
CheckpointConfig
(
save_checkpoint_steps
=
1
,
ckptconfig
=
CheckpointConfig
(
save_checkpoint_steps
=
ds_train
.
get_dataset_size
()
,
keep_checkpoint_max
=
5
)
ckpoint_cb
=
ModelCheckpoint
(
prefix
=
'widedeep_train'
,
directory
=
configure
.
ckpt_path
,
config
=
ckptconfig
)
model
.
train
(
epochs
,
ds_train
,
callbacks
=
[
TimeMonitor
(
ds_train
.
get_dataset_size
()),
callback
,
ckpoint_cb
])
...
...
@@ -82,4 +82,5 @@ if __name__ == "__main__":
config
=
WideDeepConfig
()
config
.
argparse_init
()
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
config
.
device_target
)
test_train
(
config
)
model_zoo/wide_and_deep/train_and_eval.py
浏览文件 @
3231c4ab
...
...
@@ -15,7 +15,7 @@
import
os
from
mindspore
import
Model
,
context
from
mindspore.train.callback
import
ModelCheckpoint
,
CheckpointConfig
from
mindspore.train.callback
import
ModelCheckpoint
,
CheckpointConfig
,
TimeMonitor
from
src.wide_and_deep
import
PredictWithSigmoid
,
TrainStepWrap
,
NetWithLossClass
,
WideDeepModel
from
src.callbacks
import
LossCallBack
,
EvalCallBack
...
...
@@ -23,10 +23,11 @@ from src.datasets import create_dataset
from
src.metrics
import
AUCMetric
from
src.config
import
WideDeepConfig
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
"Davinci"
)
def
get_WideDeep_net
(
config
):
"""
Get network of wide&deep model.
"""
WideDeep_net
=
WideDeepModel
(
config
)
loss_net
=
NetWithLossClass
(
WideDeep_net
,
config
)
...
...
@@ -87,11 +88,13 @@ def test_train_eval(config):
out
=
model
.
eval
(
ds_eval
)
print
(
"====="
*
5
+
"model.eval() initialized: {}"
.
format
(
out
))
model
.
train
(
epochs
,
ds_train
,
callbacks
=
[
eval_callback
,
callback
,
ckpoint_cb
])
model
.
train
(
epochs
,
ds_train
,
callbacks
=
[
TimeMonitor
(
ds_train
.
get_dataset_size
()),
eval_callback
,
callback
,
ckpoint_cb
])
if
__name__
==
"__main__"
:
wide_deep_config
=
WideDeepConfig
()
wide_deep_config
.
argparse_init
()
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
wide_deep_config
.
device_target
)
test_train_eval
(
wide_deep_config
)
model_zoo/wide_and_deep/train_and_eval_auto_parallel.py
浏览文件 @
3231c4ab
...
...
@@ -40,6 +40,9 @@ init()
def
get_WideDeep_net
(
config
):
"""
Get network of wide&deep model.
"""
WideDeep_net
=
WideDeepModel
(
config
)
loss_net
=
NetWithLossClass
(
WideDeep_net
,
config
)
loss_net
=
VirtualDatasetCellTriple
(
loss_net
)
...
...
model_zoo/wide_and_deep/train_and_eval_distribute.py
浏览文件 @
3231c4ab
...
...
@@ -33,6 +33,9 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
def
get_WideDeep_net
(
config
):
"""
Get network of wide&deep model.
"""
WideDeep_net
=
WideDeepModel
(
config
)
loss_net
=
NetWithLossClass
(
WideDeep_net
,
config
)
train_net
=
TrainStepWrap
(
loss_net
)
...
...
@@ -90,8 +93,12 @@ def train_and_eval(config):
callback
=
LossCallBack
(
config
=
config
)
ckptconfig
=
CheckpointConfig
(
save_checkpoint_steps
=
ds_train
.
get_dataset_size
(),
keep_checkpoint_max
=
5
)
ckpoint_cb
=
ModelCheckpoint
(
prefix
=
'widedeep_train'
,
directory
=
config
.
ckpt_path
,
config
=
ckptconfig
)
if
config
.
device_target
==
"Ascend"
:
ckpoint_cb
=
ModelCheckpoint
(
prefix
=
'widedeep_train'
,
directory
=
config
.
ckpt_path
,
config
=
ckptconfig
)
elif
config
.
device_target
==
"GPU"
:
ckpoint_cb
=
ModelCheckpoint
(
prefix
=
'widedeep_train_'
+
str
(
get_rank
()),
directory
=
config
.
ckpt_path
,
config
=
ckptconfig
)
out
=
model
.
eval
(
ds_eval
)
print
(
"====="
*
5
+
"model.eval() initialized: {}"
.
format
(
out
))
model
.
train
(
epochs
,
ds_train
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录