In [None]:
# This notebook provides another PyTorch example in which
# we will train a neural network on the MNIST dataset.

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import time

# If you are facing problems importing torchvision,
# you may need to upgrade to a later version.
from torchvision import datasets, transforms

# Decide whether to run on GPU or CPU.
use_cuda = False
device = torch.device("cuda" if use_cuda else "cpu")

batch_size = 64

In [None]:
# It is a good practice to set the random seeds manually
# in order to make the experiments more reproducible,
# although completely reproducible results are not guaranteed:
# https://pytorch.org/docs/stable/notes/randomness.html
np.random.seed(42)
torch.manual_seed(42)

In [None]:
# Define the neural networks. They extend the nn.Module and typically stack
# different PyTorch standard layers and modules (e.g. nn.Linear, nn.Conv2d,
# nn.Dropout2d, nn.BatchNorm2d, etc.) and functions (e.g. F.relu) together.

# The forward(...) method defines the computation of each module (and is typically
# overridden). The arguments of forward(...) (there might be multiple of them)
# represent the module inputs. The returned values of forward(...) (they can be
# more than one as well) represent the module outputs.

class Net(nn.Module):
 def __init__(self):
 super(Net, self).__init__()
 self.fc = nn.Linear(28*28, 200)
 self.fc2 = nn.Linear(200,10)

 def forward(self, x):
 x = x.view((-1, 28*28))
 x = F.relu(self.fc(x))
 x = self.fc2(x)
 return x
 
class ConvNet(nn.Module):
 def __init__(self):
 super(ConvNet, self).__init__()
 self.conv1 = nn.Conv2d(1, 32, kernel_size=(5, 5))
 self.bn1 = nn.BatchNorm2d(32)
 self.conv2 = nn.Conv2d(32, 32, kernel_size=(5, 5))
 self.bn2 = nn.BatchNorm2d(32)
 self.conv2_drop = nn.Dropout2d(p=0.2)
 self.fc1 = nn.Linear(128, 100)
 self.fc2 = nn.Linear(100, 10)
 self.conv3 = nn.Conv2d(32, 64, kernel_size=(3, 3))
 self.bn3 = nn.BatchNorm2d(64)
 self.conv4 = nn.Conv2d(64, 64, kernel_size=(3, 3))
 self.bn4 = nn.BatchNorm2d(64)
 self.conv5 = nn.Conv2d(64, 128, kernel_size=(3, 3))
 self.bn5 = nn.BatchNorm2d(128)
 self.conv6 = nn.Conv2d(128, 128, kernel_size=(1, 1))
 self.bn6 = nn.BatchNorm2d(128)

 def forward(self, x):
 x = F.relu(self.conv1(x))
 x = self.bn1(x)
 x = F.relu(self.conv2(x))
 x = self.conv2_drop(F.max_pool2d(self.bn2(x), 2))
 x = F.relu(self.conv3(x))
 x = self.bn3(x)
 x = F.relu(self.conv4(x))
 x = self.bn4(x)
 x = F.max_pool2d(x, 2)
 x = self.conv2_drop(x)
 x = F.relu(self.conv5(x))
 x = self.bn5(x)
 x = F.relu(self.conv6(x))
 x = self.bn6(x)
 size = x.size()[1] * x.size()[2] * x.size()[3]
 # print(size)
 x = x.view(-1, size)
 x = F.relu(self.fc1(x))
 x = self.fc2(x)
 return x

In [None]:
# Specify the dataset. In this example we will use MNIST.
# It will be downloaded in the mnist_data/ folder.

train_dataset = datasets.MNIST('mnist_data/', train=True, download=True, transform=transforms.Compose(
 [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
))
test_dataset = datasets.MNIST('mnist_data/', train=False, download=True, transform=transforms.Compose(
 [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
))

In [None]:
# The data loaders will be used during training and testing to provide us with the batches.

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
# Move the model to the corresponding device (GPU or CPU).
# (This has any effect only if the model is not already there)
model = Net().to(device)

# Set the model in training mode. This has any effect only on certain modules
# (e.g. Dropout, BatchNorm) which behave differently in train and test mode.
model.train()

In [None]:
# Learning rate of the optimizer. 
learning_rate = 0.0001
# How many epochs to do.
num_epochs = 20

# Optimizers:
""" SGD vs Adam """
#opt = optim.SGD(params=model.parameters(), lr=learning_rate)
opt = optim.Adam(params=model.parameters(), lr=learning_rate)

# We will use cross entropy loss to train the network.
ce_loss = torch.nn.CrossEntropyLoss()

tot_steps = 0

for epoch in range(1,num_epochs+1):
 t1 = time.time()
 for batch_idx, (x_batch, y_batch) in enumerate(train_loader):
 x_batch, y_batch = x_batch.to(device), y_batch.to(device)
 tot_steps += 1
 
 # Need to zero the gradients. Otherwise, the gradients from
 # previous steps will be accumulated.
 opt.zero_grad()
 
 # Obtain the predictions made by the current model.
 out = model(x_batch)

 # Compute the loss.
 # out - model predictions,
 # y_batch - ground truths.
 batch_loss = ce_loss(out, y_batch)
 
 if batch_idx % 100 == 0:
 pred = torch.max(out, dim=1)[1]
 acc = pred.eq(y_batch).sum().item() / float(batch_size)

 # Backpropagate the loss.
 batch_loss.backward()
 # Make a single step of the optimizer to update the model.
 opt.step()
 
 # Evaluate the model on the test set after the current epoch:
 
 # In practice, this should be done on the validation set and
 # evaluate on the test set only after the model is trained and fine-tuned.
 
 # Here only compute the accuracy, but other metrics can be obtained, too,
 # depending on the task (e.g. precision, recall, etc.).
 tot_test, tot_acc = 0.0, 0.0
 for batch_idx, (x_batch, y_batch) in enumerate(test_loader):
 x_batch, y_batch = x_batch.to(device), y_batch.to(device)
 out = model(x_batch)
 pred = torch.max(out, dim=1)[1]
 acc = pred.eq(y_batch).sum().item()
 tot_acc += acc
 tot_test += x_batch.size()[0]
 t2 = time.time()
 
 print('Epoch %d: Accuracy %.5lf [%.2lf seconds]' % (epoch, tot_acc/tot_test, t2-t1)) 