雖然Lasagne自帶的examples中帶有MNIST的例子,但是這個例子的風格更接近用Theano來寫。使用nolearn.lasagne中的NeuralNet類來寫MLP似乎更符合Lasagne的設計初衷,也更像Caffe的風格。
code
沒有比代碼更清楚地了:
# Using lasagne to fit mnist.
# show how to use NeuralNet
# Tarrega, 150611.
import lasagne
from lasagne import layers
from lasagne.updates import nesterov_momentum
from nolearn.lasagne import NeuralNet
import os
from urllib import urlretrieve
import gzip
import pickle
import numpy
DATA_URL = 'http://deeplearning.net/data/mnist/mnist.pkl.gz'
DATA_FILENAME = 'mnist.pkl.gz'
def pickle_load(f, encoding):
return pickle.load(f)
def _load_data(url=DATA_URL, filename=DATA_FILENAME):
"""Load data from `url` and store the result in `filename`."""
print 'filename for the minist datatset:',filename
if not os.path.exists(filename):
print("Downloading MNIST dataset")
urlretrieve(url, filename)
with gzip.open(filename, 'rb') as f:
return pickle_load(f, encoding='latin-1')
def load():
"""Get data with labels, split into training, validation and test set."""
data = _load_data()
X_train, y_train = data[0]
y_train = numpy.asarray(y_train,dtype='int32')
#X_valid, y_valid = data[1]
#X_test, y_test = data[2]
print 'size:',X_train.shape, y_train.shape, X_train.dtype, y_train.dtype
return X_train,y_train
net1 = NeuralNet(
layers=[ # three layers: one hidden layer
('input', layers.InputLayer),
('hidden', layers.DenseLayer),
('output', layers.DenseLayer),
],
# layer parameters:
input_shape=(None, 28*28), # 28x28 input pixels per batch
hidden_num_units=200, # number of units in hidden layer
output_nonlinearity=lasagne.nonlinearities.softmax, # output layer
output_num_units=10, # 10 target values
# optimization method:
update=nesterov_momentum,
update_learning_rate=0.01,
update_momentum=0.9,
regression=False, # flag to indicate we're dealing with regression problem
max_epochs=400, # we want to train this many epochs
verbose=1,
)
X, y = load()
net1.fit(X, y)
output
運行結果:
epoch train loss valid loss train/val valid acc dur
------- ------------ ------------ ----------- ----------- -----
1 0.57989 0.32038 1.80998 0.91009 2.60s
2 0.30194 0.26267 1.14949 0.92692 2.64s
3 0.25044 0.22917 1.09281 0.93482 2.63s
4 0.21578 0.20542 1.05046 0.94105 2.83s
5 0.18969 0.18748 1.01177 0.94560 2.73s
6 0.16917 0.17336 0.97585 0.94876 2.73s
7 0.15259 0.16223 0.94059 0.95232 2.80s
8 0.13882 0.15313 0.90655 0.95509 2.79s
9 0.12723 0.14541 0.87498 0.95776 2.71s
10 0.11725 0.13897 0.84373 0.96043 2.65s
11 0.10859 0.13341 0.81392 0.96152 2.74s
12 0.10100 0.12864 0.78511 0.96271 2.65s
13 0.09424 0.12444 0.75735 0.96389 2.65s
14 0.08822 0.12075 0.73056 0.96508 2.75s
15 0.08280 0.11754 0.70446 0.96567 2.66s
16 0.07790 0.11458 0.67988 0.96647 2.55s
17 0.07343 0.11192 0.65606 0.96676 2.68s
18 0.06936 0.10953 0.63324 0.96785 2.69s
19 0.06561 0.10741 0.61082 0.96874 2.61s
20 0.06218 0.10538 0.59000 0.96914 2.60s
21 0.05899 0.10357 0.56959 0.96943 2.64s
22 0.05604 0.10197 0.54953 0.96973 2.59s
23 0.05329 0.10048 0.53033 0.97022 2.61s
24 0.05070 0.09921 0.51105 0.97052 2.71s
25 0.04829 0.09795 0.49296 0.97072 2.68s
Questions:
- 爲什麼運行速度比Lasagne自帶的Example快這麼多?NeuralNet類自動把Training分爲80%Train,20%Test,可能計算量小了。