% matplotlib inline
import torch
import torch. nn as nn
import numpy as np
import sys
sys. path. append( "/home/kesci/input" )
import d2lzh1981 as d2l
print ( torch. __version__)
def dropout ( X, drop_prob) :
X = X. float ( )
assert 0 <= drop_prob <= 1
keep_prob = 1 - drop_prob
if keep_prob == 0 :
return torch. zeros_like( X)
mask = ( torch. rand( X. shape) < keep_prob) . float ( )
return mask * X / keep_prob
X = torch. arange( 16 ) . view( 2 , 8 )
dropout( X, 0 )
dropout( X, 0.5 )
dropout( X, 1.0 )
num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784 , 10 , 256 , 256
W1 = torch. tensor( np. random. normal( 0 , 0.01 , size= ( num_inputs, num_hiddens1) ) , dtype= torch. float , requires_grad= True )
b1 = torch. zeros( num_hiddens1, requires_grad= True )
W2 = torch. tensor( np. random. normal( 0 , 0.01 , size= ( num_hiddens1, num_hiddens2) ) , dtype= torch. float , requires_grad= True )
b2 = torch. zeros( num_hiddens2, requires_grad= True )
W3 = torch. tensor( np. random. normal( 0 , 0.01 , size= ( num_hiddens2, num_outputs) ) , dtype= torch. float , requires_grad= True )
b3 = torch. zeros( num_outputs, requires_grad= True )
params = [ W1, b1, W2, b2, W3, b3]
drop_prob1, drop_prob2 = 0.2 , 0.5
def net ( X, is_training= True ) :
X = X. view( - 1 , num_inputs)
H1 = ( torch. matmul( X, W1) + b1) . relu( )
if is_training:
H1 = dropout( H1, drop_prob1)
H2 = ( torch. matmul( H1, W2) + b2) . relu( )
if is_training:
H2 = dropout( H2, drop_prob2)
return torch. matmul( H2, W3) + b3
def evaluate_accuracy ( data_iter, net) :
acc_sum, n = 0.0 , 0
for X, y in data_iter:
if isinstance ( net, torch. nn. Module) :
net. eval ( )
acc_sum += ( net( X) . argmax( dim= 1 ) == y) . float ( ) . sum ( ) . item( )
net. train( )
else :
if ( 'is_training' in net. __code__. co_varnames) :
acc_sum += ( net( X, is_training= False ) . argmax( dim= 1 ) == y) . float ( ) . sum ( ) . item( )
else :
acc_sum += ( net( X) . argmax( dim= 1 ) == y) . float ( ) . sum ( ) . item( )
n += y. shape[ 0 ]
return acc_sum / n
num_epochs, lr, batch_size = 5 , 100.0 , 256
loss = torch. nn. CrossEntropyLoss( )
train_iter, test_iter = d2l. load_data_fashion_mnist( batch_size, root= '/home/kesci/input/FashionMNIST2065' )
d2l. train_ch3(
net,
train_iter,
test_iter,
loss,
num_epochs,
batch_size,
params,
lr)
epoch 1, loss 0.0046, train acc 0.549, test acc 0.704
epoch 2, loss 0.0023, train acc 0.785, test acc 0.737
epoch 3, loss 0.0019, train acc 0.825, test acc 0.834
epoch 4, loss 0.0017, train acc 0.842, test acc 0.763
epoch 5, loss 0.0016, train acc 0.848, test acc 0.813