{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# This notebook provides another PyTorch example in which\n",
    "# we will train a neural network on the MNIST dataset.\n",
    "\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim\n",
    "import torch.nn.functional as F\n",
    "import numpy as np\n",
    "import time\n",
    "\n",
    "# If you are facing problems importing torchvision,\n",
    "# you may need to upgrade to a later version.\n",
    "from torchvision import datasets, transforms\n",
    "\n",
    "# Decide whether to run on GPU or CPU.\n",
    "use_cuda = False\n",
    "device = torch.device(\"cuda\" if use_cuda else \"cpu\")\n",
    "\n",
    "batch_size = 64"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# It is a good practice to set the random seeds manually\n",
    "# in order to make the experiments more reproducible,\n",
    "# although completely reproducible results are not guaranteed:\n",
    "# https://pytorch.org/docs/stable/notes/randomness.html\n",
    "np.random.seed(42)\n",
    "torch.manual_seed(42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the neural networks. They extend the nn.Module and typically stack\n",
    "# different PyTorch standard layers and modules (e.g. nn.Linear, nn.Conv2d,\n",
    "# nn.Dropout2d, nn.BatchNorm2d, etc.) and functions (e.g. F.relu) together.\n",
    "\n",
    "# The forward(...) method defines the computation of each module (and is typically\n",
    "# overridden). The arguments of forward(...) (there might be multiple of them)\n",
    "# represent the module inputs. The returned values of forward(...) (they can be\n",
    "# more than one as well) represent the module outputs.\n",
    "\n",
    "class Net(nn.Module):\n",
    "    def __init__(self):\n",
    "        super(Net, self).__init__()\n",
    "        self.fc = nn.Linear(28*28, 200)\n",
    "        self.fc2 = nn.Linear(200,10)\n",
    "\n",
    "    def forward(self, x):\n",
    "        x = x.view((-1, 28*28))\n",
    "        x = F.relu(self.fc(x))\n",
    "        x = self.fc2(x)\n",
    "        return x\n",
    "    \n",
    "class ConvNet(nn.Module):\n",
    "    def __init__(self):\n",
    "        super(ConvNet, self).__init__()\n",
    "        self.conv1 = nn.Conv2d(1, 32, kernel_size=(5, 5))\n",
    "        self.bn1 = nn.BatchNorm2d(32)\n",
    "        self.conv2 = nn.Conv2d(32, 32, kernel_size=(5, 5))\n",
    "        self.bn2 = nn.BatchNorm2d(32)\n",
    "        self.conv2_drop = nn.Dropout2d(p=0.2)\n",
    "        self.fc1 = nn.Linear(128, 100)\n",
    "        self.fc2 = nn.Linear(100, 10)\n",
    "        self.conv3 = nn.Conv2d(32, 64, kernel_size=(3, 3))\n",
    "        self.bn3 = nn.BatchNorm2d(64)\n",
    "        self.conv4 = nn.Conv2d(64, 64, kernel_size=(3, 3))\n",
    "        self.bn4 = nn.BatchNorm2d(64)\n",
    "        self.conv5 = nn.Conv2d(64, 128, kernel_size=(3, 3))\n",
    "        self.bn5 = nn.BatchNorm2d(128)\n",
    "        self.conv6 = nn.Conv2d(128, 128, kernel_size=(1, 1))\n",
    "        self.bn6 = nn.BatchNorm2d(128)\n",
    "\n",
    "    def forward(self, x):\n",
    "        x = F.relu(self.conv1(x))\n",
    "        x = self.bn1(x)\n",
    "        x = F.relu(self.conv2(x))\n",
    "        x = self.conv2_drop(F.max_pool2d(self.bn2(x), 2))\n",
    "        x = F.relu(self.conv3(x))\n",
    "        x = self.bn3(x)\n",
    "        x = F.relu(self.conv4(x))\n",
    "        x = self.bn4(x)\n",
    "        x = F.max_pool2d(x, 2)\n",
    "        x = self.conv2_drop(x)\n",
    "        x = F.relu(self.conv5(x))\n",
    "        x = self.bn5(x)\n",
    "        x = F.relu(self.conv6(x))\n",
    "        x = self.bn6(x)\n",
    "        size = x.size()[1] * x.size()[2] * x.size()[3]\n",
    "        # print(size)\n",
    "        x = x.view(-1, size)\n",
    "        x = F.relu(self.fc1(x))\n",
    "        x = self.fc2(x)\n",
    "        return x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Specify the dataset. In this example we will use MNIST.\n",
    "# It will be downloaded in the mnist_data/ folder.\n",
    "\n",
    "train_dataset = datasets.MNIST('mnist_data/', train=True, download=True, transform=transforms.Compose(\n",
    "    [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]\n",
    "))\n",
    "test_dataset = datasets.MNIST('mnist_data/', train=False, download=True, transform=transforms.Compose(\n",
    "    [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]\n",
    "))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# The data loaders will be used during training and testing to provide us with the batches.\n",
    "\n",
    "train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)\n",
    "test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Move the model to the corresponding device (GPU or CPU).\n",
    "# (This has any effect only if the model is not already there)\n",
    "model = Net().to(device)\n",
    "\n",
    "# Set the model in training mode. This has any effect only on certain modules\n",
    "# (e.g. Dropout, BatchNorm) which behave differently in train and test mode.\n",
    "model.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Learning rate of the optimizer. \n",
    "learning_rate = 0.0001\n",
    "# How many epochs to do.\n",
    "num_epochs = 20\n",
    "\n",
    "# Optimizers:\n",
    "\"\"\" SGD vs Adam \"\"\"\n",
    "#opt = optim.SGD(params=model.parameters(), lr=learning_rate)\n",
    "opt = optim.Adam(params=model.parameters(), lr=learning_rate)\n",
    "\n",
    "# We will use cross entropy loss to train the network.\n",
    "ce_loss = torch.nn.CrossEntropyLoss()\n",
    "\n",
    "tot_steps = 0\n",
    "\n",
    "for epoch in range(1,num_epochs+1):\n",
    "    t1 = time.time()\n",
    "    for batch_idx, (x_batch, y_batch) in enumerate(train_loader):\n",
    "        x_batch, y_batch = x_batch.to(device), y_batch.to(device)\n",
    "        tot_steps += 1\n",
    "        \n",
    "        # Need to zero the gradients. Otherwise, the gradients from\n",
    "        # previous steps will be accumulated.\n",
    "        opt.zero_grad()\n",
    "        \n",
    "        # Obtain the predictions made by the current model.\n",
    "        out = model(x_batch)\n",
    "\n",
    "        # Compute the loss.\n",
    "        # out - model predictions,\n",
    "        # y_batch - ground truths.\n",
    "        batch_loss = ce_loss(out, y_batch)\n",
    "        \n",
    "        if batch_idx % 100 == 0:\n",
    "            pred = torch.max(out, dim=1)[1]\n",
    "            acc = pred.eq(y_batch).sum().item() / float(batch_size)\n",
    "\n",
    "        # Backpropagate the loss.\n",
    "        batch_loss.backward()\n",
    "        # Make a single step of the optimizer to update the model.\n",
    "        opt.step()\n",
    "    \n",
    "    # Evaluate the model on the test set after the current epoch:\n",
    "    \n",
    "    # In practice, this should be done on the validation set and\n",
    "    # evaluate on the test set only after the model is trained and fine-tuned.\n",
    "    \n",
    "    # Here only compute the accuracy, but other metrics can be obtained, too,\n",
    "    # depending on the task (e.g. precision, recall, etc.).\n",
    "    tot_test, tot_acc = 0.0, 0.0\n",
    "    for batch_idx, (x_batch, y_batch) in enumerate(test_loader):\n",
    "        x_batch, y_batch = x_batch.to(device), y_batch.to(device)\n",
    "        out = model(x_batch)\n",
    "        pred = torch.max(out, dim=1)[1]\n",
    "        acc = pred.eq(y_batch).sum().item()\n",
    "        tot_acc += acc\n",
    "        tot_test += x_batch.size()[0]\n",
    "    t2 = time.time()\n",
    "    \n",
    "    print('Epoch %d: Accuracy %.5lf [%.2lf seconds]' % (epoch, tot_acc/tot_test, t2-t1))           "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}