From c2df1eb1355796723ebaf43b3766e6dc00e8a28f Mon Sep 17 00:00:00 2001 From: Nicky Chan Date: Wed, 11 Jul 2018 20:06:19 -0700 Subject: [PATCH] Caffe2 demo (#467) --- demo/README.md | 8 +- demo/caffe2/TUTORIAL_EN.md | 144 ++++++++++++++++++++ demo/caffe2/caffe2_mnist_demo.py | 217 +++++++++++++++++++++++++++++++ demo/demo_en.rst | 3 +- demo/keras/keras_mnist_demo.py | 3 +- 5 files changed, 372 insertions(+), 3 deletions(-) create mode 100644 demo/caffe2/TUTORIAL_EN.md create mode 100644 demo/caffe2/caffe2_mnist_demo.py diff --git a/demo/README.md b/demo/README.md index fd0a9edb..f71d25b6 100644 --- a/demo/README.md +++ b/demo/README.md @@ -18,7 +18,7 @@ great model and the generated fake images are really funny. This demo only works with CycleGAN mode, read [CycleGAN train doc](https://github.com/Superjomn/pytorch-CycleGAN-and-pix2pix#cyclegan-traintest) and [changes to the original code](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/compare/master...Superjomn:master) for more information. -## MxNet Mnist +## MxNet MNIST Locates in `./mxnet_demo`. By adding VisualDL as callbacks to `model.fit`, @@ -32,3 +32,9 @@ Locates in `./pytorch`. This shows how to use VisualDL in PyTorch for a CNN on `cifar10` dataset. We visualize the loss in Scalar, two convolutional layers in Image, the change trend of conv1 weights in Histogram and the final model graph in Graph. + +## Caffe2 MNIST +Locates in `./caffe2`. + +This shows how to use VisualDL in Caffe2 for LeNet model on `mnist` dataset. We visualize the loss and accuracy in Scalar, +CONV parameters in Histograms. diff --git a/demo/caffe2/TUTORIAL_EN.md b/demo/caffe2/TUTORIAL_EN.md new file mode 100644 index 00000000..78d20105 --- /dev/null +++ b/demo/caffe2/TUTORIAL_EN.md @@ -0,0 +1,144 @@ +# How to use VisualDL in Caffe2 + +Here we will show you how to use VisualDL with Caffe2 so that you can visualize the training process by using Caffe2. +We will use the Caffe2 Convolution Neural Network to train the handwritten digit [MNIST](http://yann.lecun.com/exdb/mnist/) dataset as an example. + +This example is the simplification from Caffe2 MNIST tutorial +[Example](https://github.com/caffe2/tutorials/blob/master/MNIST.ipynb)in addition with VisualDL log writer. + +The full demonstration code can be downloaded in [here](https://github.com/PaddlePaddle/VisualDL/blob/develop/demo/caffe2/caffe2_mnist_demo.py). + +Make sure you have a working caffe2 environment before trying following code. Notice VisualDL requires protobuf 3.5+ in order to run. + +First we initialize Loggers for different types of record as follows: + +```python +from visualdl import LogWriter + +# create VisualDL logger and directory +logdir = "/workspace" +logger = LogWriter(logdir, sync_cycle=100) + +# create 'train' run +with logger.mode("train"): + # create a scalar component called 'scalars/' + scalar_caffe2_mnist_train_loss = logger.scalar("scalars/scalar_caffe2_mnist_train_loss") + scalar_caffe2_mnist_train_accuracy = logger.scalar("scalars/scalar_caffe2_mnist_train_accuracy") + histogram0 = logger.histogram("histogram/histogram0", num_buckets=50) + histogram1 = logger.histogram("histogram/histogram1", num_buckets=50) + +``` + +For our model, we will be constructing the LeNet model with the sigmoid activations replaced with ReLUs. +Following is how Caffe2 define input, operators and model definition. + +```python +def AddInput(model, batch_size, db, db_type): + data_uint8, label = model.TensorProtosDBInput( + [], ["data_uint8", "label"], batch_size=batch_size, + db=db, db_type=db_type) + # cast the data to float + data = model.Cast(data_uint8, "data", to=core.DataType.FLOAT) + # scale data from [0,255] down to [0,1] + data = model.Scale(data, data, scale=float(1. / 256)) + # don't need the gradient for the backward pass + data = model.StopGradient(data, data) + return data, label + + +def AddLeNetModel(model, data): + # Image size: 28 x 28 -> 24 x 24 + conv1 = brew.conv(model, data, 'conv1', dim_in=1, dim_out=20, kernel=5) + # Image size: 24 x 24 -> 12 x 12 + pool1 = brew.max_pool(model, conv1, 'pool1', kernel=2, stride=2) + # Image size: 12 x 12 -> 8 x 8 + conv2 = brew.conv(model, pool1, 'conv2', dim_in=20, dim_out=50, kernel=5) + # Image size: 8 x 8 -> 4 x 4 + pool2 = brew.max_pool(model, conv2, 'pool2', kernel=2, stride=2) + # 50 * 4 * 4 stands for dim_out from previous layer multiplied by the image size + # Here, the data is flattened from a tensor of dimension 50x4x4 to a vector of length 50*4*4 + fc3 = brew.fc(model, pool2, 'fc3', dim_in=50 * 4 * 4, dim_out=500) + relu3 = brew.relu(model, fc3, 'relu3') + # Last FC Layer + pred = brew.fc(model, relu3, 'pred', dim_in=500, dim_out=10) + # Softmax Layer + softmax = brew.softmax(model, pred, 'softmax') + + return softmax + + +def AddAccuracy(model, softmax, label): + """Adds an accuracy op to the model""" + accuracy = brew.accuracy(model, [softmax, label], "accuracy") + return accuracy + + +def AddTrainingOperators(model, softmax, label): + """Adds training operators to the model.""" + # Compute cross entropy between softmax scores and labels + xent = model.LabelCrossEntropy([softmax, label], 'xent') + # Compute the expected loss + loss = model.AveragedLoss(xent, "loss") + # Track the accuracy of the model + AddAccuracy(model, softmax, label) + # Use the average loss we just computed to add gradient operators to the model + model.AddGradientOperators([loss]) + # Specify the optimization algorithm + optimizer.build_sgd( + model, + base_learning_rate=0.1, + policy="step", + stepsize=1, + gamma=0.999, + ) +``` + +Use caffe2 model helper to construct model with definitions above. Prepare for training. + +```python +arg_scope = {"order": "NCHW"} +# Create the model helper for the train model +train_model = model_helper.ModelHelper(name="mnist_train", arg_scope=arg_scope) +# Specify the input is from the train lmdb +data, label = AddInput( + train_model, batch_size=64, + db=os.path.join(data_folder, 'mnist-train-nchw-lmdb'), + db_type='lmdb') +# Add the model definition (fc layers, conv layers, softmax, etc.) +softmax = AddLeNetModel(train_model, data) +# Add training operators, specify loss function and optimization algorithm +AddTrainingOperators(train_model, softmax, label) +``` + +Then we start training and use VisualDL to record data for scalar and histogram at the same time. +Here we record for accuracy, loss as scalars and weights as histogram. + + +```python +workspace.RunNetOnce(train_model.param_init_net) +workspace.CreateNet(train_model.net, overwrite=True) + +total_iters = 200 +accuracy = np.zeros(total_iters) +loss = np.zeros(total_iters) + +# MAIN TRAINING LOOP! +# Now, we will manually run the network for 200 iterations. +for i in range(total_iters): + workspace.RunNet(train_model.net) + accuracy[i] = workspace.blobs['accuracy'] + loss[i] = workspace.blobs['loss'] + + scalar_caffe2_mnist_train_loss.add_record(i, loss[i]) + scalar_caffe2_mnist_train_accuracy.add_record(i, accuracy[i]) + + conv1_w = workspace.FetchBlob("conv1_w") + conv2_w = workspace.FetchBlob("conv2_w") + + histogram0.add_record(i, conv1_w[0].flatten()) + histogram1.add_record(i, conv2_w[0].flatten()) + + # Check the accuracy and loss every so often + if i % 25 == 0: + print("Iter: {}, Loss: {}, Accuracy: {}".format(i, loss[i], accuracy[i])) +``` diff --git a/demo/caffe2/caffe2_mnist_demo.py b/demo/caffe2/caffe2_mnist_demo.py new file mode 100644 index 00000000..5ea20b86 --- /dev/null +++ b/demo/caffe2/caffe2_mnist_demo.py @@ -0,0 +1,217 @@ +# Copyright (c) 2017 VisualDL Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ======================================================================= + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import numpy as np +import os +import shutil + +from caffe2.python import ( + brew, + core, + model_helper, + optimizer, + workspace, ) + +# Here we import LogWriter so that we can write log data while MXNet is training +from visualdl import LogWriter + +# If you would like to see some really detailed initializations, +# you can change --caffe2_log_level=0 to --caffe2_log_level=-1 +core.GlobalInit(['caffe2', '--caffe2_log_level=0']) +print("Necessities imported!") + + +# This section preps your image and test set in a lmdb database +def DownloadResource(url, path): + '''Downloads resources from s3 by url and unzips them to the provided path''' + import requests + import zipfile + import StringIO + print("Downloading... {} to {}".format(url, path)) + r = requests.get(url, stream=True) + z = zipfile.ZipFile(StringIO.StringIO(r.content)) + z.extractall(path) + print("Completed download and extraction.") + + +# Setup the paths for the necessary directories +current_folder = os.path.join(os.path.expanduser('~'), 'caffe2_notebooks') +data_folder = os.path.join(current_folder, 'tutorial_data', 'mnist') +root_folder = os.path.join(current_folder, 'tutorial_files', 'tutorial_mnist') +db_missing = False + +# Check if the data folder already exists +if not os.path.exists(data_folder): + os.makedirs(data_folder) + print("Your data folder was not found!! This was generated: {}".format( + data_folder)) + +# Check if the training lmdb exists in the data folder +if os.path.exists(os.path.join(data_folder, "mnist-train-nchw-lmdb")): + print("lmdb train db found!") +else: + db_missing = True + +# Attempt the download of the db if either was missing +if db_missing: + print("one or both of the MNIST lmbd dbs not found!!") + db_url = "http://download.caffe2.ai/databases/mnist-lmdb.zip" + try: + DownloadResource(db_url, data_folder) + except Exception as ex: + print( + "Failed to download dataset. Please download it manually from {}". + format(db_url)) + print("Unzip it and place the two database folders here: {}".format( + data_folder)) + raise ex + +# Clean up statistics from any old runs +if os.path.exists(root_folder): + print( + "Looks like you ran this before, so we need to cleanup those old files..." + ) + shutil.rmtree(root_folder) + +os.makedirs(root_folder) +workspace.ResetWorkspace(root_folder) + +print("training data folder:" + data_folder) +print("workspace root folder:" + root_folder) + + +def AddInput(model, batch_size, db, db_type): + data_uint8, label = model.TensorProtosDBInput( + [], ["data_uint8", "label"], + batch_size=batch_size, + db=db, + db_type=db_type) + # cast the data to float + data = model.Cast(data_uint8, "data", to=core.DataType.FLOAT) + # scale data from [0,255] down to [0,1] + data = model.Scale(data, data, scale=float(1. / 256)) + # don't need the gradient for the backward pass + data = model.StopGradient(data, data) + return data, label + + +def AddLeNetModel(model, data): + # Image size: 28 x 28 -> 24 x 24 + conv1 = brew.conv(model, data, 'conv1', dim_in=1, dim_out=20, kernel=5) + # Image size: 24 x 24 -> 12 x 12 + pool1 = brew.max_pool(model, conv1, 'pool1', kernel=2, stride=2) + # Image size: 12 x 12 -> 8 x 8 + conv2 = brew.conv(model, pool1, 'conv2', dim_in=20, dim_out=50, kernel=5) + # Image size: 8 x 8 -> 4 x 4 + pool2 = brew.max_pool(model, conv2, 'pool2', kernel=2, stride=2) + # 50 * 4 * 4 stands for dim_out from previous layer multiplied by the image size + # Here, the data is flattened from a tensor of dimension 50x4x4 to a vector of length 50*4*4 + fc3 = brew.fc(model, pool2, 'fc3', dim_in=50 * 4 * 4, dim_out=500) + relu3 = brew.relu(model, fc3, 'relu3') + # Last FC Layer + pred = brew.fc(model, relu3, 'pred', dim_in=500, dim_out=10) + # Softmax Layer + softmax = brew.softmax(model, pred, 'softmax') + + return softmax + + +def AddAccuracy(model, softmax, label): + """Adds an accuracy op to the model""" + accuracy = brew.accuracy(model, [softmax, label], "accuracy") + return accuracy + + +def AddTrainingOperators(model, softmax, label): + """Adds training operators to the model.""" + # Compute cross entropy between softmax scores and labels + xent = model.LabelCrossEntropy([softmax, label], 'xent') + # Compute the expected loss + loss = model.AveragedLoss(xent, "loss") + # Track the accuracy of the model + AddAccuracy(model, softmax, label) + # Use the average loss we just computed to add gradient operators to the model + model.AddGradientOperators([loss]) + # Specify the optimization algorithm + optimizer.build_sgd( + model, + base_learning_rate=0.1, + policy="step", + stepsize=1, + gamma=0.999, ) + + +# create VisualDL logger +logdir = "/workspace" +logger = LogWriter(logdir, sync_cycle=100) + +# mark the components with 'train' label. +with logger.mode("train"): + # create a scalar component called 'scalars/' + scalar_caffe2_mnist_train_loss = logger.scalar( + "scalars/scalar_caffe2_mnist_train_loss") + scalar_caffe2_mnist_train_accuracy = logger.scalar( + "scalars/scalar_caffe2_mnist_train_accuracy") + histogram0 = logger.histogram("histogram/histogram0", num_buckets=50) + histogram1 = logger.histogram("histogram/histogram1", num_buckets=50) + +# Specify the data will be input in NCHW order +# (i.e. [batch_size, num_channels, height, width]) +arg_scope = {"order": "NCHW"} +# Create the model helper for the train model +train_model = model_helper.ModelHelper(name="mnist_train", arg_scope=arg_scope) +# Specify the input is from the train lmdb +data, label = AddInput( + train_model, + batch_size=64, + db=os.path.join(data_folder, 'mnist-train-nchw-lmdb'), + db_type='lmdb') +# Add the model definition (fc layers, conv layers, softmax, etc.) +softmax = AddLeNetModel(train_model, data) +# Add training operators, specify loss function and optimization algorithm +AddTrainingOperators(train_model, softmax, label) + +workspace.RunNetOnce(train_model.param_init_net) +workspace.CreateNet(train_model.net, overwrite=True) + +total_iters = 200 +accuracy = np.zeros(total_iters) +loss = np.zeros(total_iters) + +# MAIN TRAINING LOOP! +# Now, we will manually run the network for 200 iterations. +for i in range(total_iters): + workspace.RunNet(train_model.net) + accuracy[i] = workspace.blobs['accuracy'] + loss[i] = workspace.blobs['loss'] + + scalar_caffe2_mnist_train_loss.add_record(i, loss[i]) + scalar_caffe2_mnist_train_accuracy.add_record(i, accuracy[i]) + + conv1_w = workspace.FetchBlob("conv1_w") + conv2_w = workspace.FetchBlob("conv2_w") + + histogram0.add_record(i, conv1_w[0].flatten()) + histogram1.add_record(i, conv2_w[0].flatten()) + + # Check the accuracy and loss every so often + if i % 25 == 0: + print( + "Iter: {}, Loss: {}, Accuracy: {}".format(i, loss[i], accuracy[i])) diff --git a/demo/demo_en.rst b/demo/demo_en.rst index 963e9af6..26e7d576 100644 --- a/demo/demo_en.rst +++ b/demo/demo_en.rst @@ -10,4 +10,5 @@ here are some examples for different platforms. paddle/TUTORIAL_EN.md keras/TUTORIAL_EN.md mxnet/TUTORIAL_EN.md - pytorch/TUTORIAL_EN.md \ No newline at end of file + pytorch/TUTORIAL_EN.md + caffe2/TUTORIAL_EN.md \ No newline at end of file diff --git a/demo/keras/keras_mnist_demo.py b/demo/keras/keras_mnist_demo.py index 1397adfe..b88e5b32 100644 --- a/demo/keras/keras_mnist_demo.py +++ b/demo/keras/keras_mnist_demo.py @@ -77,7 +77,8 @@ logger = LogWriter(logdir, sync_cycle=100) # mark the components with 'train' label. with logger.mode("train"): # create a scalar component called 'scalars/' - scalar_keras_train_loss = logger.scalar("scalars/scalar_keras_train_loss") + scalar_keras_train_loss = logger.scalar( + "scalars/scalar_keras_mnist_train_loss") image_input = logger.image("images/input", 1) image0 = logger.image("images/image0", 1) image1 = logger.image("images/image1", 1) -- GitLab