Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MindSpore
docs
提交
7a364ee5
D
docs
项目概览
MindSpore
/
docs
通知
4
Star
2
Fork
2
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
docs
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7a364ee5
编写于
8月 26, 2020
作者:
O
ougongchang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix the mindinsight notebook tutorial
上级
aa0cb598
变更
3
展开全部
隐藏空白更改
内联
并排
Showing
3 changed file
with
228 addition
and
452 deletion
+228
-452
tutorials/notebook/mindinsight/calculate_and_datagraphic.ipynb
...ials/notebook/mindinsight/calculate_and_datagraphic.ipynb
+64
-79
tutorials/notebook/mindinsight/mindinsight_image_histogram_scalar_tensor.ipynb
...ndinsight/mindinsight_image_histogram_scalar_tensor.ipynb
+91
-91
tutorials/notebook/mindinsight/mindinsight_model_lineage_and_data_lineage.ipynb
...dinsight/mindinsight_model_lineage_and_data_lineage.ipynb
+73
-282
未找到文件。
tutorials/notebook/mindinsight/calculate_and_datagraphic.ipynb
浏览文件 @
7a364ee5
...
...
@@ -74,51 +74,55 @@
"metadata": {},
"outputs": [],
"source": [
"import urllib.request \n",
"from urllib.parse import urlparse\n",
"import gzip \n",
"import os\n",
"import gzip\n",
"import urllib.request\n",
"from urllib.parse import urlparse\n",
"\n",
"\n",
"def unzip_file(gzip_path):\n",
" \"\"\"unzip dataset file\n",
" \"\"\"\n",
" Unzip a given gzip file.\n",
"\n",
" Args:\n",
" gzip_path
: dataset
file path\n",
" gzip_path
(str): The gzip
file path\n",
" \"\"\"\n",
" open_file = open(gzip_path.replace('.gz',''), 'wb')\n",
" open_file = open(gzip_path.replace('.gz',
''), 'wb')\n",
" gz_file = gzip.GzipFile(gzip_path)\n",
" open_file.write(gz_file.read())\n",
" gz_file.close()\n",
" \n",
"\n",
"\n",
"def download_dataset():\n",
" \"\"\"Download the dataset from http://yann.lecun.com/exdb/mnist/.\"\"\"\n",
" print(\"******Downloading the MNIST dataset******\")\n",
" train_path = \"./MNIST_Data/train/\"
\n",
" train_path = \"./MNIST_Data/train/\"\n",
" test_path = \"./MNIST_Data/test/\"\n",
" train_path_check = os.path.exists(train_path)\n",
" test_path_check = os.path.exists(test_path)\n",
" if
train_path_check == False and test_path_check == False
:\n",
" if
not train_path_check and not test_path_check
:\n",
" os.makedirs(train_path)\n",
" os.makedirs(test_path)\n",
" train_url = {\"http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz\", \"http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz\"}\n",
" test_url = {\"http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz\", \"http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz\"}\n",
" \n",
" train_url = {\"http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz\",\n",
" \"http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz\"}\n",
" test_url = {\"http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz\",\n",
" \"http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz\"}\n",
"\n",
" for url in train_url:\n",
" url_parse = urlparse(url)\n",
" \"\"\"split the file name from url\"\"\"\n",
" file_name = os.path.join(train_path,url_parse.path.split('/')[-1])\n",
" if not os.path.exists(file_name.replace('.gz', '')):\n",
" file = urllib.request.urlretrieve(url, file_name)\n",
" unzipfile(file_name)\n",
" os.remove(file_name)\n",
" \n",
" # split the file name from url\n",
" file_name = os.path.join(train_path, url_parse.path.split('/')[-1])\n",
" if not os.path.exists(file_name.replace('.gz', '')) and not os.path.exists(file_name):\n",
" urllib.request.urlretrieve(url, file_name)\n",
" unzip_file(file_name)\n",
"\n",
" for url in test_url:\n",
" url_parse = urlparse(url)\n",
" \"\"\"split the file name from url\"\"\"\n",
" file_name = os.path.join(test_path,url_parse.path.split('/')[-1])\n",
" if not os.path.exists(file_name.replace('.gz', '')):\n",
" file = urllib.request.urlretrieve(url, file_name)\n",
" unzipfile(file_name)\n",
" os.remove(file_name)\n",
" # split the file name from url\n",
" file_name = os.path.join(test_path, url_parse.path.split('/')[-1])\n",
" if not os.path.exists(file_name.replace('.gz', '')) and not os.path.exists(file_name):\n",
" urllib.request.urlretrieve(url, file_name)\n",
" unzip_file(file_name)\n",
"\n",
"download_dataset()"
]
...
...
@@ -127,9 +131,8 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 数据集使用\n",
"\n",
"设置正确的数据存放路径,可将数据集读取出来,并对整体数据集做预处理,让数据更能发挥模型性能。MindInsight可视化的数据图,便是显示的数据集预处理时的变化方式和顺序。"
"#### 数据增强\n",
"对数据集进行数据增强操作,可以提升模型精度。\n"
]
},
{
...
...
@@ -148,32 +151,39 @@
"def create_dataset(data_path, batch_size=32, repeat_size=1,\n",
" num_parallel_workers=1):\n",
" \"\"\"\n",
" create dataset for train or test\n",
" Create dataset for train or test.\n",
"\n",
" Args:\n",
" data_path (str): The absolute path of the dataset\n",
" batch_size (int): The number of data records in each group\n",
" repeat_size (int): The number of replicated data records\n",
" num_parallel_workers (int): The number of parallel workers\n",
" \"\"\"\n",
"
\"\"\"define dataset\"\"\"
\n",
"
# define dataset
\n",
" mnist_ds = ds.MnistDataset(data_path)\n",
"\n",
" # define some parameters needed for data enhancement and rough justification\n",
" resize_height, resize_width = 32, 32\n",
" rescale = 1.0 / 255.0\n",
" shift = 0.0\n",
" rescale_nml = 1 / 0.3081\n",
" shift_nml = -1 * 0.1307 / 0.3081\n",
"\n",
" \"\"\"define map operations\"\"\"\n",
" type_cast_op = C.TypeCast(mstype.int32)\n",
" resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR) # Bilinear mode\n",
" # according to the parameters, generate the corresponding data enhancement method\n",
" resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR)\n",
" rescale_nml_op = CV.Rescale(rescale_nml, shift_nml)\n",
" rescale_op = CV.Rescale(rescale, shift)\n",
" hwc2chw_op = CV.HWC2CHW()\n",
" type_cast_op = C.TypeCast(mstype.int32)\n",
"\n",
"
\"\"\"apply map operations on images\"\"\"
\n",
"
# using map method to apply operations to a dataset
\n",
" mnist_ds = mnist_ds.map(input_columns=\"label\", operations=type_cast_op, num_parallel_workers=num_parallel_workers)\n",
" mnist_ds = mnist_ds.map(input_columns=\"image\", operations=resize_op, num_parallel_workers=num_parallel_workers)\n",
" mnist_ds = mnist_ds.map(input_columns=\"image\", operations=rescale_op, num_parallel_workers=num_parallel_workers)\n",
" mnist_ds = mnist_ds.map(input_columns=\"image\", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers)\n",
" mnist_ds = mnist_ds.map(input_columns=\"image\", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)\n",
"\n",
"
\"\"\"apply DatasetOps\"\"\"
\n",
"
\n",
"
# process the generated dataset
\n",
" buffer_size = 10000\n",
" mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size) # 10000 as in LeNet train script\n",
" mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True)\n",
...
...
@@ -272,15 +282,13 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 主程序运行\n",
"\n",
"1. 首先在主函数之前调用所需要的模块,并在主函数之前使用相应接口。\n",
"#### 执行训练\n",
"\n",
"2. 本次体验主要完成计算图与数据图的可视化,定义变量`specified={'collect_graph': True,'collect_dataset_graph': True}`,在`specified`字典中,键名`collect_graph`值设置为`True`,表示记录计算图;键名`collect_dataset_graph`值设置为`True`,表示记录数据图。\n",
"1. 导入所需的代码包,并示例化训练网络。\n",
"2. 通过MindSpore提供的 `SummaryCollector` 接口,实现收集计算图和数据图。在实例化 `SummaryCollector` 时,在 `collect_specified_data` 参数中,通过设置 `collect_graph` 指定收集计算图,设置 `collect_dataset_graph` 指定收集数据图。\n",
"\n",
"3. 定义完`specified`变量后,传参到`summary_collector`中,最后将`summary_collector`传参到`model`中。\n",
"\n",
"至此,模型中就有了计算图与数据图的可视化功能。"
"更多 `SummaryCollector` 的用法,请点击[API文档](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.train.html?highlight=summarycollector#mindspore.train.callback.SummaryCollector)查看。\n",
"\n"
]
},
{
...
...
@@ -293,9 +301,7 @@
"from mindspore import context\n",
"from mindspore.train import Model\n",
"from mindspore.nn.metrics import Accuracy\n",
"from mindspore.train.callback import SummaryCollector\n",
"from mindspore.train.serialization import load_checkpoint, load_param_into_net\n",
"from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor\n",
"from mindspore.train.callback import LossMonitor, SummaryCollector\n",
"\n",
"if __name__ == \"__main__\":\n",
" device_target = \"CPU\"\n",
...
...
@@ -308,18 +314,15 @@
" net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction=\"mean\")\n",
" net_opt = nn.Momentum(network.trainable_params(), learning_rate=0.01, momentum=0.9)\n",
" time_cb = TimeMonitor(data_size=ds_train.get_dataset_size())\n",
" config_ck = CheckpointConfig(save_checkpoint_steps=1875, keep_checkpoint_max=10)\n",
" ckpoint_cb = ModelCheckpoint(prefix=\"checkpoint_lenet\", config=config_ck)\n",
" model = Model(network, net_loss, net_opt, metrics={\"Accuracy\": Accuracy()})\n",
" specified={'collect_graph': True,'collect_dataset_graph': True}\n",
"\n",
" specified={'collect_graph': True, 'collect_dataset_graph': True}\n",
" summary_collector = SummaryCollector(summary_dir='./summary_dir', collect_specified_data=specified, collect_freq=1, keep_default_action=False)\n",
" \n",
" print(\"============== Starting Training ==============\")\n",
" model.train(epoch=2, train_dataset=ds_train, callbacks=[
time_cb, ckpoint_cb,
LossMonitor(), summary_collector], dataset_sink_mode=False)\n",
" model.train(epoch=2, train_dataset=ds_train, callbacks=[LossMonitor(), summary_collector], dataset_sink_mode=False)\n",
"\n",
" print(\"============== Starting Testing ==============\")\n",
" param_dict = load_checkpoint(\"checkpoint_lenet-3_1875.ckpt\")\n",
" load_param_into_net(network, param_dict)\n",
" ds_eval = create_dataset(\"./MNIST_Data/test/\")\n",
" acc = model.eval(ds_eval, dataset_sink_mode=False)\n",
" print(\"============== {} ==============\".format(acc))"
...
...
@@ -333,6 +336,8 @@
"- 启动MindInsigh服务命令:`mindinsigh start --summary-base-dir=/path/ --port=8080`;\n",
"- 执行完服务命令后,访问给出的地址,查看MindInsigh可视化结果。\n",
"\n",
"> 其中 /path/ 为 `SummaryCollector` 中参数 `summary_dir` 所指定的目录。\n",
"\n",
"![title](https://gitee.com/mindspore/docs/raw/master/tutorials/notebook/mindinsight/images/mindinsight_map.png)"
]
},
...
...
@@ -354,45 +359,25 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### 数据图
信息
\n",
"### 数据图
展示
\n",
"\n",
"数据图
所展示的顺序与数据集使用处代码顺序对应
\n",
"数据图
展示了数据增强中对数据进行操作的流程。
\n",
"\n",
"1. 首先是从加载数据集
`mnist_ds = ds.MnistDataset(data_path)`开始,对应数据图中
`MnistDataset`。\n",
"1. 首先是从加载数据集
`mnist_ds = ds.MnistDataset(data_path)` 开始,对应数据图中
`MnistDataset`。\n",
"\n",
"2.
在以下所示代码中,是数据预处理的一些方法,顺序与数据图中所示顺序对应
。\n",
"2.
下面代码为上面的 `create_dataset` 函数中作数据预处理与数据增强的相关操作。可以从数据图中清晰地看到数据处理的流程。通过查看数据图,可以帮助分析是否存在不恰当的数据处理流程
。\n",
"\n",
"```\n",
"type_cast_op = C.TypeCast(mstype.int32)\n",
"resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR)\n",
"rescale_nml_op = CV.Rescale(rescale_nml, shift_nml)\n",
"rescale_op = CV.Rescale(rescale, shift)\n",
"hwc2chw_op = CV.HWC2CHW()\n",
"mnist_ds = mnist_ds.map(input_columns=\"label\", operations=type_cast_op, num_parallel_workers=num_parallel_workers)\n",
"mnist_ds = mnist_ds.map(input_columns=\"image\", operations=resize_op, num_parallel_workers=num_parallel_workers)\n",
"mnist_ds = mnist_ds.map(input_columns=\"image\", operations=rescale_op, num_parallel_workers=num_parallel_workers)\n",
"mnist_ds = mnist_ds.map(input_columns=\"image\", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers)\n",
"mnist_ds = mnist_ds.map(input_columns=\"image\", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)\n",
"```\n",
"\n",
"- `TypeCast`:在数据集`create_data`函数中,使用:`TypeCase(mstype.int32)`,将数据类型转换成我们所设置的类型。\n",
"- `Resize`:在数据集`create_data`函数中,使用:`Resize(resize_height,resize_width = 32,32)`,可以将数据的高和宽做调整。\n",
"- `Rescale`:在数据集`create_data`函数中,使用:`rescale = 1.0 / 255.0`;`Rescale(rescale,shift)`,可以重新数据格式。\n",
"- `HWC2CHW`:在数据集`create_data`函数中,使用:`HWC2CHW()`,此方法可以将数据所带信息与通道结合,一并加载。\n",
"\n",
"\n",
"3. 前面的几个步骤是数据集的预处理顺序,后面几个步骤是模型加载数据集时要定义的参数,顺序与数据图中对应。\n",
"\n",
"```\n",
"buffer_size = 10000\n",
"mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size) # 10000 as in LeNet train script\n",
"mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True)\n",
"mnist_ds = mnist_ds.repeat(repeat_size)\n",
"```\n",
" \n",
"- `Shuffle`:在数据集`create_data`函数中,使用:`buffer_size = 10000`,后面数值可以支持自行设置,表示一次缓存数据的数量。\n",
"- `Batch`:在数据集`create_data`函数中,使用:`batch_size = 32`。支持自行设置,表示将整体数据集划分成小批量数据集,每一个小批次作为一个整体进行训练。\n",
"- `Repeat`:在数据集`create_data`函数中,使用:`repeat_size = 1`,支持自行设定,表示的是一次运行中要训练的次数。"
"```\n"
]
},
{
...
...
@@ -408,7 +393,7 @@
"source": [
"### 关闭MindInsight\n",
"\n",
"- 查看完成后,在命令行中可执行此命令`mindinsight stop --port=8080`,关闭MindInsight。"
"- 查看完成后,在命令行中可执行此命令
`mindinsight stop --port=8080`,关闭MindInsight。"
]
}
],
...
...
@@ -433,4 +418,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}
\ No newline at end of file
tutorials/notebook/mindinsight/mindinsight_image_histogram_scalar_tensor.ipynb
浏览文件 @
7a364ee5
此差异已折叠。
点击以展开。
tutorials/notebook/mindinsight/mindinsight_model_lineage_and_data_lineage.ipynb
浏览文件 @
7a364ee5
此差异已折叠。
点击以展开。
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录