Quick cheatsheet
Contents:
1. Lasagne MNIST example[1][2]
$ git clone https://github.com/Lasagne/Lasagne.git
$ cd Lasagne/examples
$ python mnist.py
Link:
mnist.py
#!/usr/bin/env python
#filename: mnist.py
"""
Usage example employing Lasagne for digit recognition using the MNIST dataset.
This example is deliberately structured as a long flat file, focusing on how
to use Lasagne, instead of focusing on writing maximally modular and reusable
code. It is used as the foundation for the introductory Lasagne tutorial:
http://lasagne.readthedocs.org/en/latest/user/tutorial.html
More in-depth examples and reproductions of paper results are maintained in
a separate repository: https://github.com/Lasagne/Recipes
"""
from __future__ import print_function
import sys
import os
import time
import numpy as np
import theano
import theano.tensor as T
import lasagne
# ################## Download and prepare the MNIST dataset ##################
# This is just some way of getting the MNIST dataset from an online location
# and loading it into numpy arrays. It doesn't involve Lasagne at all.
def load_dataset():
# We first define a download function, supporting both Python 2 and 3.
if sys.version_info[0] == 2:
from urllib import urlretrieve
else:
from urllib.request import urlretrieve
def download(filename, source='http://yann.lecun.com/exdb/mnist/'):
print("Downloading %s" % filename)
urlretrieve(source + filename, filename)
# We then define functions for loading MNIST images and labels.
# For convenience, they also download the requested files if needed.
import gzip
def load_mnist_images(filename):
if not os.path.exists(filename):
download(filename)
# Read the inputs in Yann LeCun's binary format.
with gzip.open(filename, 'rb') as f:
data = np.frombuffer(f.read(), np.uint8, offset=16)
# The inputs are vectors now, we reshape them to monochrome 2D images,
# following the shape convention: (examples, channels, rows, columns)
data = data.reshape(-1, 1, 28, 28)
# The inputs come as bytes, we convert them to float32 in range [0,1].
# (Actually to range [0, 255/256], for compatibility to the version
# provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.)
return data / np.float32(256)
def load_mnist_labels(filename):
if not os.path.exists(filename):
download(filename)
# Read the labels in Yann LeCun's binary format.
with gzip.open(filename, 'rb') as f:
data = np.frombuffer(f.read(), np.uint8, offset=8)
# The labels are vectors of integers now, that's exactly what we want.
return data
# We can now download and read the training and test set images and labels.
X_train = load_mnist_images('train-images-idx3-ubyte.gz')
y_train = load_mnist_labels('train-labels-idx1-ubyte.gz')
X_test = load_mnist_images('t10k-images-idx3-ubyte.gz')
y_test = load_mnist_labels('t10k-labels-idx1-ubyte.gz')
# We reserve the last 10000 training examples for validation.
X_train, X_val = X_train[:-10000], X_train[-10000:]
y_train, y_val = y_train[:-10000], y_train[-10000:]
# We just return all the arrays in order, as expected in main().
# (It doesn't matter how we do this as long as we can read them again.)
return X_train, y_train, X_val, y_val, X_test, y_test
# ##################### Build the neural network model #######################
# This script supports three types of models. For each one, we define a
# function that takes a Theano variable representing the input and returns
# the output layer of a neural network model built in Lasagne.
def build_mlp(input_var=None):
# This creates an MLP of two hidden layers of 800 units each, followed by
# a softmax output layer of 10 units. It applies 20% dropout to the input
# data and 50% dropout to the hidden layers.
# Input layer, specifying the expected input shape of the network
# (unspecified batchsize, 1 channel, 28 rows and 28 columns) and
# linking it to the given Theano variable `input_var`, if any:
l_in = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
input_var=input_var)
# Apply 20% dropout to the input data:
l_in_drop = lasagne.layers.DropoutLayer(l_in, p=0.2)
# Add a fully-connected layer of 800 units, using the linear rectifier, and
# initializing weights with Glorot's scheme (which is the default anyway):
l_hid1 = lasagne.layers.DenseLayer(
l_in_drop, num_units=800,
nonlinearity=lasagne.nonlinearities.rectify,
W=lasagne.init.GlorotUniform())
# We'll now add dropout of 50%:
l_hid1_drop = lasagne.layers.DropoutLayer(l_hid1, p=0.5)
# Another 800-unit layer:
l_hid2 = lasagne.layers.DenseLayer(
l_hid1_drop, num_units=800,
nonlinearity=lasagne.nonlinearities.rectify)
# 50% dropout again:
l_hid2_drop = lasagne.layers.DropoutLayer(l_hid2, p=0.5)
# Finally, we'll add the fully-connected output layer, of 10 softmax units:
l_out = lasagne.layers.DenseLayer(
l_hid2_drop, num_units=10,
nonlinearity=lasagne.nonlinearities.softmax)
# Each layer is linked to its incoming layer(s), so we only need to pass
# the output layer to give access to a network in Lasagne:
return l_out
def build_custom_mlp(input_var=None, depth=2, width=800, drop_input=.2,
drop_hidden=.5):
# By default, this creates the same network as `build_mlp`, but it can be
# customized with respect to the number and size of hidden layers. This
# mostly showcases how creating a network in Python code can be a lot more
# flexible than a configuration file. Note that to make the code easier,
# all the layers are just called `network` -- there is no need to give them
# different names if all we return is the last one we created anyway; we
# just used different names above for clarity.
# Input layer and dropout (with shortcut `dropout` for `DropoutLayer`):
network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
input_var=input_var)
if drop_input:
network = lasagne.layers.dropout(network, p=drop_input)
# Hidden layers and dropout:
nonlin = lasagne.nonlinearities.rectify
for _ in range(depth):
network = lasagne.layers.DenseLayer(
network, width, nonlinearity=nonlin)
if drop_hidden:
network = lasagne.layers.dropout(network, p=drop_hidden)
# Output layer:
softmax = lasagne.nonlinearities.softmax
network = lasagne.layers.DenseLayer(network, 10, nonlinearity=softmax)
return network
def build_cnn(input_var=None):
# As a third model, we'll create a CNN of two convolution + pooling stages
# and a fully-connected hidden layer in front of the output layer.
# Input layer, as usual:
network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
input_var=input_var)
# This time we do not apply input dropout, as it tends to work less well
# for convolutional layers.
# Convolutional layer with 32 kernels of size 5x5. Strided and padded
# convolutions are supported as well; see the docstring.
network = lasagne.layers.Conv2DLayer(
network, num_filters=32, filter_size=(5, 5),
nonlinearity=lasagne.nonlinearities.rectify,
W=lasagne.init.GlorotUniform())
# Expert note: Lasagne provides alternative convolutional layers that
# override Theano's choice of which implementation to use; for details
# please see http://lasagne.readthedocs.org/en/latest/user/tutorial.html.
# Max-pooling layer of factor 2 in both dimensions:
network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))
# Another convolution with 32 5x5 kernels, and another 2x2 pooling:
network = lasagne.layers.Conv2DLayer(
network, num_filters=32, filter_size=(5, 5),
nonlinearity=lasagne.nonlinearities.rectify)
network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))
# A fully-connected layer of 256 units with 50% dropout on its inputs:
network = lasagne.layers.DenseLayer(
lasagne.layers.dropout(network, p=.5),
num_units=256,
nonlinearity=lasagne.nonlinearities.rectify)
# And, finally, the 10-unit output layer with 50% dropout on its inputs:
network = lasagne.layers.DenseLayer(
lasagne.layers.dropout(network, p=.5),
num_units=10,
nonlinearity=lasagne.nonlinearities.softmax)
return network
# ############################# Batch iterator ###############################
# This is just a simple helper function iterating over training data in
# mini-batches of a particular size, optionally in random order. It assumes
# data is available as numpy arrays. For big datasets, you could load numpy
# arrays as memory-mapped files (np.load(..., mmap_mode='r')), or write your
# own custom data iteration function. For small datasets, you can also copy
# them to GPU at once for slightly improved performance. This would involve
# several changes in the main program, though, and is not demonstrated here.
# Notice that this function returns only mini-batches of size `batchsize`.
# If the size of the data is not a multiple of `batchsize`, it will not
# return the last (remaining) mini-batch.
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
assert len(inputs) == len(targets)
if shuffle:
indices = np.arange(len(inputs))
np.random.shuffle(indices)
for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)
yield inputs[excerpt], targets[excerpt]
# ############################## Main program ################################
# Everything else will be handled in our main program now. We could pull out
# more functions to better separate the code, but it wouldn't make it any
# easier to read.
def main(model='mlp', num_epochs=500):
# Load the dataset
print("Loading data...")
X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()
# Prepare Theano variables for inputs and targets
input_var = T.tensor4('inputs')
target_var = T.ivector('targets')
# Create neural network model (depending on first command line parameter)
print("Building model and compiling functions...")
if model == 'mlp':
network = build_mlp(input_var)
elif model.startswith('custom_mlp:'):
depth, width, drop_in, drop_hid = model.split(':', 1)[1].split(',')
network = build_custom_mlp(input_var, int(depth), int(width),
float(drop_in), float(drop_hid))
elif model == 'cnn':
network = build_cnn(input_var)
else:
print("Unrecognized model type %r." % model)
return
# Create a loss expression for training, i.e., a scalar objective we want
# to minimize (for our multi-class problem, it is the cross-entropy loss):
prediction = lasagne.layers.get_output(network)
loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
loss = loss.mean()
# We could add some weight decay as well here, see lasagne.regularization.
# Create update expressions for training, i.e., how to modify the
# parameters at each training step. Here, we'll use Stochastic Gradient
# Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
params = lasagne.layers.get_all_params(network, trainable=True)
updates = lasagne.updates.nesterov_momentum(
loss, params, learning_rate=0.01, momentum=0.9)
# Create a loss expression for validation/testing. The crucial difference
# here is that we do a deterministic forward pass through the network,
# disabling dropout layers.
test_prediction = lasagne.layers.get_output(network, deterministic=True)
test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
target_var)
test_loss = test_loss.mean()
# As a bonus, also create an expression for the classification accuracy:
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
dtype=theano.config.floatX)
# Compile a function performing a training step on a mini-batch (by giving
# the updates dictionary) and returning the corresponding training loss:
train_fn = theano.function([input_var, target_var], loss, updates=updates)
# Compile a second function computing the validation loss and accuracy:
val_fn = theano.function([input_var, target_var], [test_loss, test_acc])
# Finally, launch the training loop.
print("Starting training...")
# We iterate over epochs:
for epoch in range(num_epochs):
# In each epoch, we do a full pass over the training data:
train_err = 0
train_batches = 0
start_time = time.time()
for batch in iterate_minibatches(X_train, y_train, 500, shuffle=True):
inputs, targets = batch
train_err += train_fn(inputs, targets)
train_batches += 1
# And a full pass over the validation data:
val_err = 0
val_acc = 0
val_batches = 0
for batch in iterate_minibatches(X_val, y_val, 500, shuffle=False):
inputs, targets = batch
err, acc = val_fn(inputs, targets)
val_err += err
val_acc += acc
val_batches += 1
# Then we print the results for this epoch:
print("Epoch {} of {} took {:.3f}s".format(
epoch + 1, num_epochs, time.time() - start_time))
print(" training loss:\t\t{:.6f}".format(train_err / train_batches))
print(" validation loss:\t\t{:.6f}".format(val_err / val_batches))
print(" validation accuracy:\t\t{:.2f} %".format(
val_acc / val_batches * 100))
# After training, we compute and print the test error:
test_err = 0
test_acc = 0
test_batches = 0
for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False):
inputs, targets = batch
err, acc = val_fn(inputs, targets)
test_err += err
test_acc += acc
test_batches += 1
print("Final results:")
print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches))
print(" test accuracy:\t\t{:.2f} %".format(
test_acc / test_batches * 100))
# Optionally, you could now dump the network weights to a file like this:
# np.savez('model.npz', *lasagne.layers.get_all_param_values(network))
#
# And load them again later on like this:
# with np.load('model.npz') as f:
# param_values = [f['arr_%d' % i] for i in range(len(f.files))]
# lasagne.layers.set_all_param_values(network, param_values)
if __name__ == '__main__':
if ('--help' in sys.argv) or ('-h' in sys.argv):
print("Trains a neural network on MNIST using Lasagne.")
print("Usage: %s [MODEL [EPOCHS]]" % sys.argv[0])
print()
print("MODEL: 'mlp' for a simple Multi-Layer Perceptron (MLP),")
print(" 'custom_mlp:DEPTH,WIDTH,DROP_IN,DROP_HID' for an MLP")
print(" with DEPTH hidden layers of WIDTH units, DROP_IN")
print(" input dropout and DROP_HID hidden dropout,")
print(" 'cnn' for a simple Convolutional Neural Network (CNN).")
print("EPOCHS: number of training epochs to perform (default: 500)")
else:
kwargs = {}
if len(sys.argv) > 1:
kwargs['model'] = sys.argv[1]
if len(sys.argv) > 2:
kwargs['num_epochs'] = int(sys.argv[2])
main(**kwargs)
Result
using GPU(about 7 minutes)
Using gpu device 0: Quadro K2200 (CNMeM is disabled, CuDNN 4007)
Loading data...
Building model and compiling functions...
Starting training...
Epoch 1 of 500 took 0.837s
training loss: 1.196399
validation loss: 0.402135
validation accuracy: 88.91 %
(etc.)
Epoch 499 of 500 took 0.825s
training loss: 0.017662
validation loss: 0.053915
validation accuracy: 98.63 %
Epoch 500 of 500 took 0.827s
training loss: 0.018308
validation loss: 0.053412
validation accuracy: 98.64 %
Final results:
test loss: 0.046705
test accuracy: 98.64 %
using CPU(about 85 minutes)
Loading data...
Building model and compiling functions...
Starting training...
Epoch 1 of 500 took 10.398s
training loss: 1.220270
validation loss: 0.409670
validation accuracy: 88.30 %
(etc.)
Epoch 499 of 500 took 10.327s
training loss: 0.018568
validation loss: 0.054652
validation accuracy: 98.75 %
Epoch 500 of 500 took 10.262s
training loss: 0.018887
validation loss: 0.055043
validation accuracy: 98.67 %
Final results:
test loss: 0.048217
test accuracy: 98.71 %
2. Nolearn MNIST example[3]
Link:
mnist.py
#!/usr/bin/env python
#filename: simple_mnist.py
###### Imports ######
import os
import sys
import gzip
# Populating the interactive namespace from numpy and matplotlib
import matplotlib.pyplot as plt
import numpy as np
if sys.version_info[0] == 2:
from urllib import urlretrieve
else:
from urllib.request import urlretrieve
from lasagne.layers import DenseLayer
from lasagne.layers import InputLayer
from lasagne.layers import DropoutLayer
from lasagne.layers import Conv2DLayer
from lasagne.layers import MaxPool2DLayer
from lasagne.nonlinearities import softmax
from lasagne.updates import adam
from lasagne.layers import get_all_params
from nolearn.lasagne import NeuralNet
from nolearn.lasagne import TrainSplit
from nolearn.lasagne import objective
###### Loading MNIST data ######
def get_data(filename, source='http://yann.lecun.com/exdb/mnist/', np_offset=16):
folder = "data/"
filepath = os.path.join(folder + filename)
if not os.path.exists(folder):
os.mkdir(folder)
if not os.path.exists(filepath):
print("Downloading %s" % filename)
urlretrieve(source + filename, filepath)
with gzip.open(filepath, 'rb') as f:
data = np.frombuffer(f.read(), np.uint8, offset=np_offset)
return data
def load_mnist(path_x, path_y):
X, y = [], []
X = get_data(path_x,np_offset=16).reshape(-1,1,28,28).astype(np.float32)
y = get_data(path_y,np_offset=8).astype(np.int32)
# For convolutional layers, the default shape of data is bc01,
# i.e. batch size x color channels x image dimension 1 x image dimension 2.
# Therefore, we reshape the X data to -1, 1, 28, 28.
#X.reshape(
# -1, # number of samples, -1 makes it so that this number is determined automatically
# 1, # 1 color channel, since images are only black and white
# 28, # first image dimension (vertical)
# 28, # second image dimension (horizontal)
#)
### Normalization(feature scaling) ###
X = ( X - X.mean() ) / X.std()
return X, y
##### Download mnist files #####
path_x_train = 'train-images-idx3-ubyte.gz'
path_y_train = 'train-labels-idx1-ubyte.gz'
path_x_test = 't10k-images-idx3-ubyte.gz'
path_y_test = 't10k-labels-idx1-ubyte.gz'
X_train, y_train = load_mnist(path_x_train, path_y_train)
X_test, y_test = load_mnist(path_x_test, path_y_test)
##### Plot the handwriting digitial images with labels #####
figs, axes = plt.subplots(4, 4, figsize=(6, 6))
for i in range(4):
for j in range(4):
axes[i, j].imshow(-X[i + 4 * j].reshape(28, 28), cmap='gray', interpolation='none')
axes[i, j].set_xticks([])
axes[i, j].set_yticks([])
axes[i, j].set_title("Label: {}".format(y[i + 4 * j]))
axes[i, j].axis('off')
layers1 = [
(InputLayer, {'shape': (None, X.shape[1], X.shape[2], X.shape[3])}),
(Conv2DLayer, {'num_filters': 32, 'filter_size': (3, 3)}),
(MaxPool2DLayer, {'pool_size': (2, 2)}),
(Conv2DLayer, {'num_filters': 64, 'filter_size': (3, 3)}),
(Conv2DLayer, {'num_filters': 64, 'filter_size': (3, 3)}),
(MaxPool2DLayer, {'pool_size': (2, 2)}),
(Conv2DLayer, {'num_filters': 96, 'filter_size': (3, 3)}),
(MaxPool2DLayer, {'pool_size': (2, 2)}),
(DenseLayer, {'num_units': 64}),
(DropoutLayer, {}),
(DenseLayer, {'num_units': 64}),
(DenseLayer, {'num_units': 10, 'nonlinearity': softmax}),]
net1 = NeuralNet(
layers=layers1,
max_epochs=10,
update_learning_rate=0.01,
verbose=2,
)
##### Show more information #####
net1.initialize()
layer_info = PrintLayerInfo()
layer_info(net1)
##### train the net #####
net1.fit(X, y)
##### test #####
print "Start to test....."
y_pred = net1.predict(X_test)
print "The accuracy of this network is: %0.2f" % (y_pred == y_test).mean()
Result
# Neural Network with 122154 learnable parameters
## Layer information
name size total cap.Y cap.X cov.Y cov.X
---------- -------- ------- ------- ------- ------- -------
input0 1x28x28 784 100.00 100.00 100.00 100.00
conv2d1 32x26x26 21632 100.00 100.00 10.71 10.71
maxpool2d2 32x13x13 5408 100.00 100.00 10.71 10.71
conv2d3 64x11x11 7744 85.71 85.71 25.00 25.00
conv2d4 64x9x9 5184 54.55 54.55 39.29 39.29
maxpool2d5 64x4x4 1024 54.55 54.55 39.29 39.29
conv2d6 96x2x2 384 63.16 63.16 67.86 67.86
maxpool2d7 96x1x1 96 63.16 63.16 67.86 67.86
dense8 64 64 100.00 100.00 100.00 100.00
dropout9 64 64 100.00 100.00 100.00 100.00
dense10 64 64 100.00 100.00 100.00 100.00
dense11 10 10 100.00 100.00 100.00 100.00
Explanation
X, Y: image dimensions
cap.: learning capacity
cov.: coverage of image
magenta: capacity too low (<1/6)
cyan: image coverage too high (>100%)
red: capacity too low and coverage too high
epoch trn loss val loss trn/val valid acc dur
------- ---------- ---------- --------- ----------- -----
1 0.68008 0.11388 5.97186 0.96635 6.25s
2 0.16667 0.08609 1.93598 0.97593 5.97s
3 0.11589 0.07747 1.49599 0.97984 6.28s
4 0.08812 0.07019 1.25546 0.98234 6.33s
5 0.07151 0.06582 1.08641 0.98267 6.25s
6 0.06319 0.06136 1.02982 0.98601 6.25s
7 0.05554 0.05917 0.93871 0.98567 5.84s
8 0.04899 0.06015 0.81444 0.98651 6.28s
9 0.04474 0.05213 0.85813 0.98867 6.29s
10 0.03853 0.05597 0.68836 0.98709 6.26s
Start to test.....
The accuracy of this network is: 0.99
[1] https://github.com/Lasagne/Lasagne/blob/master/examples/mnist.py
[2] https://github.com/craffel/Lasagne-tutorial/blob/master/examples/mnist.py
[3] http://nbviewer.jupyter.org/github/dnouri/nolearn/blob/master/docs/notebooks/CNN_tutorial.ipynb