From 2ff30dd9e04f6947d93145bf88086601a2a11803 Mon Sep 17 00:00:00 2001 From: daminglu Date: Wed, 31 Jan 2018 12:52:42 +0800 Subject: [PATCH] Demo of Using VisualDL in PyTorch (#230) --- demo/pytorch/TUTORIAL_CN.md | 211 ++++++++++++++++++++++ demo/pytorch/pytorch_cifar10.ipynb | 271 +++++++++++++++++++++++++++++ demo/pytorch/pytorch_cifar10.py | 148 ++++++++++++++++ 3 files changed, 630 insertions(+) create mode 100644 demo/pytorch/TUTORIAL_CN.md create mode 100644 demo/pytorch/pytorch_cifar10.ipynb create mode 100644 demo/pytorch/pytorch_cifar10.py diff --git a/demo/pytorch/TUTORIAL_CN.md b/demo/pytorch/TUTORIAL_CN.md new file mode 100644 index 00000000..ea1559db --- /dev/null +++ b/demo/pytorch/TUTORIAL_CN.md @@ -0,0 +1,211 @@ +# 如何在PyTorch中使用VisualDL + +下面我们演示一下如何在PyTorch中使用VisualDL,从而可以把PyTorch的训练过程以及最后的模型可视化出来。我们将以PyTorch用卷积神经网络(CNN, Convolutional Neural Network)来训练 +[Cifar10](https://www.cs.toronto.edu/~kriz/cifar.html) 数据集作为例子。 + + +程序的主体来自PyTorch的 [Tutorial](http://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html) +我们同时提供了 Jupyter Notebook 的可交互版本。请参见本文件夹里面的 pytorch_cifar10.ipynb + +```python +import torch +import torchvision +import torchvision.transforms as transforms +from torch.autograd import Variable +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim + +import matplotlib +matplotlib.use('Agg') + +from visualdl import LogWriter + + +transform = transforms.Compose( + [transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) + +trainset = torchvision.datasets.CIFAR10(root='./data', train=True, + download=True, transform=transform) +trainloader = torch.utils.data.DataLoader(trainset, batch_size=500, + shuffle=True, num_workers=2) + +testset = torchvision.datasets.CIFAR10(root='./data', train=False, + download=True, transform=transform) +testloader = torch.utils.data.DataLoader(testset, batch_size=500, + shuffle=False, num_workers=2) + +classes = ('plane', 'car', 'bird', 'cat', + 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') + + +import matplotlib.pyplot as plt +import numpy as np + + +# functions to show an image +def imshow(img): + img = img / 2 + 0.5 # unnormalize + npimg = img.numpy() + fig, ax = plt.subplots() + plt.imshow(np.transpose(npimg, (1, 2, 0))) + # we can either show the image or save it locally + # plt.show() + fig.savefig('out' + str(np.random.randint(0, 10000)) + '.pdf') +``` + +我们可以预览一下将要分析的 Cifar10 图片集: +

+ +

+ + +然后我们开始创建 VisualDL 的数据采集 loggers + +```python +logdir = "/workspace" +logger = LogWriter(logdir, sync_cycle=100) + +# mark the components with 'train' label. +with logger.mode("train"): + # create a scalar component called 'scalars/' + scalar_pytorch_train_loss = logger.scalar("scalars/scalar_pytorch_train_loss") + image1 = logger.image("images/image1", 1) + image2 = logger.image("images/image2", 1) + histogram0 = logger.histogram("histogram/histogram0", num_buckets=100) +``` + +Cifar10 中有 50000 个训练图像和 10000 个测试图像。我们每 500 个作为一个训练集,图片采样也选 500 。 每个训练集 (batch) 是如下的维度: + +500 x 3 x 32 x 32 + +接下来我们开始创建 CNN 模型 + +```python +# get some random training images +dataiter = iter(trainloader) +images, labels = dataiter.next() + +# show images +imshow(torchvision.utils.make_grid(images)) +# print labels +print(' '.join('%5s' % classes[labels[j]] for j in range(4))) + +# Define a Convolution Neural Network +class Net(nn.Module): + def __init__(self): + super(Net, self).__init__() + self.conv1 = nn.Conv2d(3, 6, 5) + self.pool = nn.MaxPool2d(2, 2) + self.conv2 = nn.Conv2d(6, 16, 5) + self.fc1 = nn.Linear(16 * 5 * 5, 120) + self.fc2 = nn.Linear(120, 84) + self.fc3 = nn.Linear(84, 10) + + def forward(self, x): + x = self.pool(F.relu(self.conv1(x))) + x = self.pool(F.relu(self.conv2(x))) + x = x.view(-1, 16 * 5 * 5) + x = F.relu(self.fc1(x)) + x = F.relu(self.fc2(x)) + x = self.fc3(x) + return x + + +net = Net() +``` + +接下来我们开始训练并且同时用 VisualDL 来采集相关数据 + +```python +# Train the network +for epoch in range(5): # loop over the dataset multiple times + running_loss = 0.0 + for i, data in enumerate(trainloader, 0): + # get the inputs + inputs, labels = data + + # wrap them in Variable + inputs, labels = Variable(inputs), Variable(labels) + + # zero the parameter gradients + optimizer.zero_grad() + + # forward + backward + optimize + outputs = net(inputs) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + # use VisualDL to retrieve metrics + # scalar + scalar_pytorch_train_loss.add_record(train_step, float(loss)) + + # histogram + weight_list = net.conv1.weight.view(6*3*5*5, -1) + histogram0.add_record(train_step, weight_list) + + # image + image1.start_sampling() + image1.add_sample([96, 25], net.conv2.weight.view(16*6*5*5, -1)) + image1.finish_sampling() + + image2.start_sampling() + image2.add_sample([18, 25], net.conv1.weight.view(6*3*5*5, -1)) + image2.finish_sampling() + + + train_step += 1 + + # print statistics + running_loss += loss.data[0] + if i % 2000 == 1999: # print every 2000 mini-batches + print('[%d, %5d] loss: %.3f' % + (epoch + 1, i + 1, running_loss / 2000)) + running_loss = 0.0 + +print('Finished Training') +``` + +最后,因为 PyTorch 采用 Dynamic Computation Graphs,我们用一个 dummy 输入来空跑一下模型,以便产生图 + +```python +import torch.onnx +dummy_input = Variable(torch.randn(4, 3, 32, 32)) +torch.onnx.export(net, dummy_input, "pytorch_cifar10.onnx") + +print('Done') +``` + +训练结束后,各个组件的可视化结果如下: + +关于误差的数值图的如下: + +

+ +

+ +训练过后的第一,第二层卷积权重图的如下: + +

+ +

+ + +训练参数的柱状图的如下: + +

+ +

+ + +模型图的效果如下: + +

+ +

+ + +生成的完整效果图可以在[这里](https://github.com/daming-lu/large_files/blob/master/pytorch_demo_figs/graph.png?raw=true)下载。 diff --git a/demo/pytorch/pytorch_cifar10.ipynb b/demo/pytorch/pytorch_cifar10.ipynb new file mode 100644 index 00000000..12703c21 --- /dev/null +++ b/demo/pytorch/pytorch_cifar10.ipynb @@ -0,0 +1,271 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "如何在PyTorch中使用VisualDL\n", + "=====================\n", + "\n", + "下面我们演示一下如何在PyTorch中使用VisualDL,从而可以把PyTorch的训练过程以及最后的模型可视化出来。我们将以PyTorch用卷积神经网络(CNN, Convolutional Neural Network)来训练 [Cifar10](https://www.cs.toronto.edu/~kriz/cifar.html) 数据集作为例子。\n", + "\n", + "程序的主体来自PyTorch的 [Tutorial](http://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import torch\n", + "import torchvision\n", + "import torchvision.transforms as transforms\n", + "from torch.autograd import Variable\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import torch.optim as optim\n", + "\n", + "import matplotlib\n", + "matplotlib.use('Agg')\n", + "\n", + "from visualdl import LogWriter\n", + "\n", + "\n", + "transform = transforms.Compose(\n", + " [transforms.ToTensor(),\n", + " transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])\n", + "\n", + "trainset = torchvision.datasets.CIFAR10(root='./data', train=True,\n", + " download=True, transform=transform)\n", + "trainloader = torch.utils.data.DataLoader(trainset, batch_size=500,\n", + " shuffle=True, num_workers=2)\n", + "\n", + "testset = torchvision.datasets.CIFAR10(root='./data', train=False,\n", + " download=True, transform=transform)\n", + "testloader = torch.utils.data.DataLoader(testset, batch_size=500,\n", + " shuffle=False, num_workers=2)\n", + "\n", + "classes = ('plane', 'car', 'bird', 'cat',\n", + " 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')\n", + "\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "\n", + "# functions to show an image\n", + "def imshow(img):\n", + " img = img / 2 + 0.5 # unnormalize\n", + " npimg = img.numpy()\n", + " fig, ax = plt.subplots()\n", + " plt.imshow(np.transpose(npimg, (1, 2, 0)))\n", + " # we can either show the image or save it locally\n", + " # plt.show()\n", + " fig.savefig('out' + str(np.random.randint(0, 10000)) + '.pdf')\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "然后我们开始创建 VisualDL 的数据采集 loggers\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "logdir = \"/workspace\"\n", + "logger = LogWriter(logdir, sync_cycle=100)\n", + "\n", + "# mark the components with 'train' label.\n", + "with logger.mode(\"train\"):\n", + " # create a scalar component called 'scalars/'\n", + " scalar_pytorch_train_loss = logger.scalar(\"scalars/scalar_pytorch_train_loss\")\n", + " image1 = logger.image(\"images/image1\", 1)\n", + " image2 = logger.image(\"images/image2\", 1)\n", + " histogram0 = logger.histogram(\"histogram/histogram0\", num_buckets=100)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Cifar10 中有 50000 个训练图像和 10000 个测试图像。我们每 500 个作为一个训练集,图片采样也选 500 。 每个训练集 (batch) 是如下的维度:\n", + "\n", + "500 x 3 x 32 x 32\n", + "\n", + "接下来我们开始创建 CNN 模型\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# get some random training images\n", + "dataiter = iter(trainloader)\n", + "images, labels = dataiter.next()\n", + "\n", + "# show images\n", + "imshow(torchvision.utils.make_grid(images))\n", + "# print labels\n", + "print(' '.join('%5s' % classes[labels[j]] for j in range(4)))\n", + "\n", + "# Define a Convolution Neural Network\n", + "class Net(nn.Module):\n", + " def __init__(self):\n", + " super(Net, self).__init__()\n", + " self.conv1 = nn.Conv2d(3, 6, 5)\n", + " self.pool = nn.MaxPool2d(2, 2)\n", + " self.conv2 = nn.Conv2d(6, 16, 5)\n", + " self.fc1 = nn.Linear(16 * 5 * 5, 120)\n", + " self.fc2 = nn.Linear(120, 84)\n", + " self.fc3 = nn.Linear(84, 10)\n", + "\n", + " def forward(self, x):\n", + " x = self.pool(F.relu(self.conv1(x)))\n", + " x = self.pool(F.relu(self.conv2(x)))\n", + " x = x.view(-1, 16 * 5 * 5)\n", + " x = F.relu(self.fc1(x))\n", + " x = F.relu(self.fc2(x))\n", + " x = self.fc3(x)\n", + " return x\n", + "\n", + "\n", + "net = Net()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "接下来我们开始训练并且同时用 VisualDL 来采集相关数据\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Train the network\n", + "for epoch in range(5): # loop over the dataset multiple times\n", + " running_loss = 0.0\n", + " for i, data in enumerate(trainloader, 0):\n", + " # get the inputs\n", + " inputs, labels = data\n", + "\n", + " # wrap them in Variable\n", + " inputs, labels = Variable(inputs), Variable(labels)\n", + "\n", + " # zero the parameter gradients\n", + " optimizer.zero_grad()\n", + "\n", + " # forward + backward + optimize\n", + " outputs = net(inputs)\n", + " loss = criterion(outputs, labels)\n", + "\n", + " loss.backward()\n", + " optimizer.step()\n", + "\n", + " # use VisualDL to retrieve metrics\n", + " # scalar\n", + " scalar_pytorch_train_loss.add_record(train_step, float(loss))\n", + "\n", + " # histogram\n", + " weight_list = net.conv1.weight.view(6*3*5*5, -1)\n", + " histogram0.add_record(train_step, weight_list)\n", + "\n", + " # image\n", + " image1.start_sampling()\n", + " image1.add_sample([96, 25], net.conv2.weight.view(16*6*5*5, -1))\n", + " image1.finish_sampling()\n", + "\n", + " image2.start_sampling()\n", + " image2.add_sample([18, 25], net.conv1.weight.view(6*3*5*5, -1))\n", + " image2.finish_sampling()\n", + "\n", + "\n", + " train_step += 1\n", + "\n", + " # print statistics\n", + " running_loss += loss.data[0]\n", + " if i % 2000 == 1999: # print every 2000 mini-batches\n", + " print('[%d, %5d] loss: %.3f' %\n", + " (epoch + 1, i + 1, running_loss / 2000))\n", + " running_loss = 0.0\n", + "\n", + "print('Finished Training')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "最后,因为 PyTorch 采用 Dynamic Computation Graphs,我们用一个 dummy 输入来空跑一下模型,以便产生图" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import torch.onnx\n", + "dummy_input = Variable(torch.randn(4, 3, 32, 32))\n", + "torch.onnx.export(net, dummy_input, \"pytorch_cifar10.onnx\")\n", + "\n", + "print('Done')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.14" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/demo/pytorch/pytorch_cifar10.py b/demo/pytorch/pytorch_cifar10.py new file mode 100644 index 00000000..e68f1aed --- /dev/null +++ b/demo/pytorch/pytorch_cifar10.py @@ -0,0 +1,148 @@ +import torch +import torchvision +import torchvision.transforms as transforms +from torch.autograd import Variable +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +import torch.onnx +import matplotlib +from visualdl import LogWriter +import matplotlib.pyplot as plt +import numpy as np + +matplotlib.use('Agg') + +transform = transforms.Compose([ + transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), + (0.5, 0.5, 0.5)) +]) + +trainset = torchvision.datasets.CIFAR10( + root='./data', train=True, download=True, transform=transform) +trainloader = torch.utils.data.DataLoader( + trainset, batch_size=500, shuffle=True, num_workers=2) + +testset = torchvision.datasets.CIFAR10( + root='./data', train=False, download=True, transform=transform) +testloader = torch.utils.data.DataLoader( + testset, batch_size=500, shuffle=False, num_workers=2) + +classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', + 'ship', 'truck') + + +# functions to show an image +def imshow(img): + img = img / 2 + 0.5 # unnormalize + npimg = img.numpy() + fig, ax = plt.subplots() + plt.imshow(np.transpose(npimg, (1, 2, 0))) + # we can either show the image or save it locally + # plt.show() + fig.savefig('out' + str(np.random.randint(0, 10000)) + '.pdf') + + +logdir = "/workspace" +logger = LogWriter(logdir, sync_cycle=100) + +# mark the components with 'train' label. +with logger.mode("train"): + # create a scalar component called 'scalars/' + scalar_pytorch_train_loss = logger.scalar( + "scalars/scalar_pytorch_train_loss") + image1 = logger.image("images/image1", 1) + image2 = logger.image("images/image2", 1) + histogram0 = logger.histogram("histogram/histogram0", num_buckets=100) + +# get some random training images +dataiter = iter(trainloader) +images, labels = dataiter.next() + +# show images +imshow(torchvision.utils.make_grid(images)) +# print labels +print(' '.join('%5s' % classes[labels[j]] for j in range(4))) + + +# Define a Convolution Neural Network +class Net(nn.Module): + def __init__(self): + super(Net, self).__init__() + self.conv1 = nn.Conv2d(3, 6, 5) + self.pool = nn.MaxPool2d(2, 2) + self.conv2 = nn.Conv2d(6, 16, 5) + self.fc1 = nn.Linear(16 * 5 * 5, 120) + self.fc2 = nn.Linear(120, 84) + self.fc3 = nn.Linear(84, 10) + + def forward(self, x): + x = self.pool(F.relu(self.conv1(x))) + x = self.pool(F.relu(self.conv2(x))) + x = x.view(-1, 16 * 5 * 5) + x = F.relu(self.fc1(x)) + x = F.relu(self.fc2(x)) + x = self.fc3(x) + return x + + +net = Net() + +# Define a Loss function and optimizer +criterion = nn.CrossEntropyLoss() +optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9) + +train_step = 0 + +# Train the network +for epoch in range(5): # loop over the dataset multiple times + running_loss = 0.0 + for i, data in enumerate(trainloader, 0): + # get the inputs + inputs, labels = data + + # wrap them in Variable + inputs, labels = Variable(inputs), Variable(labels) + + # zero the parameter gradients + optimizer.zero_grad() + + # forward + backward + optimize + outputs = net(inputs) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + # use VisualDL to retrieve metrics + # scalar + scalar_pytorch_train_loss.add_record(train_step, float(loss)) + + # histogram + weight_list = net.conv1.weight.view(6 * 3 * 5 * 5, -1) + histogram0.add_record(train_step, weight_list) + + # image + image1.start_sampling() + image1.add_sample([96, 25], net.conv2.weight.view(16 * 6 * 5 * 5, -1)) + image1.finish_sampling() + + image2.start_sampling() + image2.add_sample([18, 25], net.conv1.weight.view(6 * 3 * 5 * 5, -1)) + image2.finish_sampling() + + train_step += 1 + + # print statistics + running_loss += loss.data[0] + if i % 2000 == 1999: # print every 2000 mini-batches + print('[%d, %5d] loss: %.3f' % + (epoch + 1, i + 1, running_loss / 2000)) + running_loss = 0.0 + +print('Finished Training') + +dummy_input = Variable(torch.randn(4, 3, 32, 32)) +torch.onnx.export(net, dummy_input, "pytorch_cifar10.onnx") + +print('Done') -- GitLab