import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.utils.data import sampler
import torchvision.datasets as dset
import torchvision.transforms as T
import numpy as np
import timeit
class ChunkSampler(sampler.Sampler):
"""Samples elements sequentially from some offset.
Arguments:
num_samples: # of desired datapoints
start: offset where we should start selecting from
"""
def __init__(self, num_samples, start = 0):
self.num_samples = num_samples
self.start = start
def __iter__(self):
return iter(range(self.start, self.start + self.num_samples))
def __len__(self):
return self.num_samples
NUM_TRAIN = 49000
NUM_VAL = 1000
cifar10_train = dset.CIFAR10('./cs231n/datasets', train=True, download=True,
transform=T.ToTensor())
loader_train = DataLoader(cifar10_train, batch_size=64, sampler=ChunkSampler(NUM_TRAIN, 0))
cifar10_val = dset.CIFAR10('./cs231n/datasets', train=True, download=True,
transform=T.ToTensor())
loader_val = DataLoader(cifar10_val, batch_size=64, sampler=ChunkSampler(NUM_VAL, NUM_TRAIN))
cifar10_test = dset.CIFAR10('./cs231n/datasets', train=False, download=True,
transform=T.ToTensor())
loader_test = DataLoader(cifar10_test, batch_size=64)
# Constant to control how frequently we print train loss
print_every = 100
# This is a little utility that we'll use to reset the model
# if we want to re-initialize all our parameters
def reset(m):
if hasattr(m, 'reset_parameters'):
m.reset_parameters()
class Flatten(nn.Module):
def forward(self, x):
N, C, H, W = x.size() # read in N, C, H, W
return x.view(N, -1) # "flatten" the C * H * W values into a single vector per image
def train(model, loss_fn, optimizer, num_epochs = 1):
for epoch in range(num_epochs):
print('Starting epoch %d / %d' % (epoch + 1, num_epochs))
model.train()
for t, (x, y) in enumerate(loader_train):
x_var = Variable(x.type(dtype))
y_var = Variable(y.type(dtype).long())
scores = model(x_var)
loss = loss_fn(scores, y_var)
if (t + 1) % print_every == 0:
print('t = %d, loss = %.4f' % (t + 1, loss.data[0]))
optimizer.zero_grad()
loss.backward()
optimizer.step()
def check_accuracy(model, loader):
if loader.dataset.train:
print('Checking accuracy on validation set')
else:
print('Checking accuracy on test set')
num_correct = 0
num_samples = 0
model.eval() # Put the model in test mode (the opposite of model.train(), essentially)
for x, y in loader:
x_var = Variable(x.type(dtype), volatile=True)
scores = model(x_var)
_, preds = scores.data.cpu().max(1)
num_correct += (preds == y).sum()
num_samples += preds.size(0)
acc = float(num_correct) / num_samples
print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))
model = nn.Sequential(
nn.BatchNorm2d(3),
nn.Conv2d(3,64,3,1,1), #64*32*32
nn.ReLU(),
nn.Conv2d(64,32,3,1,1), #32*32*32
nn.ReLU(),
nn.MaxPool2d(2),
nn.BatchNorm2d(32),
nn.Conv2d(32,32,3,1,1), #32*16*16
nn.ReLU(),
nn.Conv2d(32,16,3,1,2), #16*16*16
nn.ReLU(),
nn.MaxPool2d(2), #16*8*8
nn.BatchNorm2d(16),
nn.Conv2d(16,16,3,1,1), #16*8*8
nn.ReLU(),
nn.Conv2d(16,8,3,1,1), #8*8*8
nn.ReLU(),
nn.MaxPool2d(2), #8*4*4
nn.BatchNorm2d(8),
Flatten(),
nn.Linear(8*4*4,64),
nn.ReLU(),
nn.BatchNorm1d(64),
nn.Linear(64,32),
nn.ReLU(),
nn.BatchNorm1d(32),
nn.Linear(32,16),
nn.ReLU(),
nn.BatchNorm1d(16),
nn.Linear(16,10)
)
dtype = torch.cuda.FloatTensor
model.type(dtype)
loss_fn = nn.CrossEntropyLoss().type(dtype)
optimizer = optim.Adam(model.parameters(),lr=1e-3)
torch.cuda.is_available()
torch.cuda.synchronize() # Make sure there are no pending GPU computations
train(model, loss_fn, optimizer, num_epochs=10)
check_accuracy(model, loader_val)
torch.cuda.synchronize()
check_accuracy(model, loader_test)
torch.cuda.synchronize()