Assignment 3 DS5620

Welcome to Assignment 3 on Computer Vision.
This assignment consists of two parts. Part-1 is based on the content you learned in the Deep
Learning lectures. You need to complete a few topics (weight initialization in neural networks,
batch normalization) as homework before solving question 4, 5, and 6. Feel free to headout to
forums for details. Part-2 is un-graded and mainly designed to help you flex the Deep Learning
muscles grown in Part-2.
Unlike the first part, you'll have to implement everything from scratch in Part-2. If you find
answers to questions in Part-2, feel free to head out to the forums and discuss them with your
classmates!
Instructions
1. Use Python 3.x to run this notebook
2. Write your code only in between the lines 'YOUR CODE STARTS HERE' and 'YOUR CODE
ENDS HERE'. you sould not change anything else code cells, if you do, the answers you
are supposed to get at the end of this assignment might be wrong.
3. Read documentation of each function carefully.
4. All the Best!
# Imports
import numpy as np
import cv2
import matplotlib.pyplot as plt
from PIL import Image
# %matplotlib inline uncomment this line if you're running this

notebook on your local PC
# DO NOT CHANGE THIS CODE

np.random.seed(10)
Part-1
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import torch.nn.functional as F
import timeit
import unittest
## Please DONOT remove these lines.

torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(0)
Data Loading and Pre-processing

# check availability of GPU and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# define a set of transforms for preparing the dataset

transform = transforms.Compose([
transforms.ToTensor(), # convert the image to a pytorch tensor
transforms.Normalize((0.1307,), (0.3081,)) # normalise the
images with mean and std of the dataset
])
# Load the MNIST training, test datasets using

`torchvision.datasets.MNIST`
#### YOUR CODE STARTS HERE ####
train_dataset = datasets.MNIST('./data', train=True, download=True,
transform=transform)
test_dataset = datasets.MNIST('../data', train=False, download=True,
transform=transform)
#### YOUR CODE ENDS HERE ####
# create dataloaders for training and test datasets

# use a batch size of 32 and set shuffle=True for the training set
train_dataloader = torch.utils.data.DataLoader(train_dataset,
batch_size=32, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset,
batch_size=32)
Network Definition
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# define a conv layer with output channels as 16, kernel size
of 3 and stride of 1


# define a max pooling layer with kernel size 2

# define dropout layer with a probability of 0.25 (may use
nn.Dropout2d function)
# define dropout layer with a probability of 0.5
# define a linear(dense) layer with 128 output features
# define a linear(dense) layer with output features

corresponding to the number of classes in the dataset
def forward(self, x):

# Use the layers defined above in a sequential way (folow the
same as the layer definitions above) and
# write the forward pass, after each of conv1, conv2, conv3
and fc1 use a relu activation.

output = F.log_softmax(x, dim=1)
return output
Sanity Check
Make sure all the tests below pass without any errors, before you proceed with the training part.
import unittest
class TestImplementations(unittest.TestCase):
# Dataloading tests
def test_dataset(self):
self.dataset_classes = ['0 - zero',
'1 - one',
'2 - two',
'3 - three',
'4 - four',
'5 - five',
'6 - six',
'7 - seven',
'8 - eight',
'9 - nine']
self.assertTrue(train_dataset.classes == self.dataset_classes)
self.assertTrue(train_dataset.train == True)
def test_dataloader(self):
self.assertTrue(train_dataloader.batch_size == 32)
self.assertTrue(test_dataloader.batch_size == 32)
def test_total_parameters(self):
model = Net().to(device)
self.assertTrue(sum(p.numel() for p in model.parameters()) ==
1015946)
suite =
unittest.TestLoader().loadTestsFromModule(TestImplementations())
unittest.TextTestRunner().run(suite)
...
----------------------------------------------------------------------
Ran 3 tests in 8.293s
OK
<unittest.runner.TextTestResult run=3 errors=0 failures=0>
Training and Inference

def train(model, device, train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
# send the image, target to the device
data, target = data.to(device), target.to(device)
# flush out the gradients stored in optimizer
optimizer.zero_grad()
# pass the image to the model and assign the output to
variable named output
output = model(data)
# calculate the loss (use nll_loss in pytorch)
loss = F.nll_loss(output, target)
# do a backward pass
loss.backward()
# update the weights
optimizer.step()
if batch_idx % 20 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss:
{:.6f}'.format(
epoch, batch_idx * len(data),
len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def test(model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
### YOUR CODE STARTS HERE ####
# send the image, target to the device
# pass the image to the model and assign the output to

variable named output

test_loss += F.nll_loss(output, target,
reduction='sum').item() # sum up batch loss
pred = output.argmax(dim=1, keepdim=True) # get the index
of the max log-probability
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}

%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
Question 1
Run the code cell below and report the final test accuracy (If you are not getting the exact
number shown in options, please report the closest number).
1. 64%
2. 79%
3. 97%
4. 90%
## Define Adam Optimiser with a learning rate of 0.01

optimizer = optim.Adam(model.parameters(), lr=0.01)
start = timeit.default_timer()
for epoch in range(1, 11):
train(model, device, train_dataloader, optimizer, epoch)
test(model, device, test_dataloader)
stop = timeit.default_timer()
print('Total time taken: {} seconds'.format(int(stop - start)) )
Question 2
Modify the network to replace ReLU activations with Sigmoid and report the final test accuracy
by running the cell below. (If you are not getting the exact number shown in options, please
report the closest number).
1. 95%
2. 54%
3. 20%
4. 9%
class NetSigmoid(nn.Module):
def __init__(self):
super(NetSigmoid, self).__init__()




x = self.conv1(x)
x = torch.sigmoid(x)
x = self.conv2(x)
x = self.conv3(x)
x = F.max_pool2d(x, 2)
x = self.dropout1(x)
x = torch.flatten(x, 1)
x = self.fc1(x)
x = self.fc2(x)
return output
model = NetSigmoid().to(device)


Question 3
Train the network defined in Question-1 with Adagrad optimizer with the same learning rate
mentioned in the question. Report the final test accuracy by running the cell below. (If you are
not getting the exact number shown in options, please report the closest number).
1. 80%
2. 99%
3. 92%
4. 85%

optimizer = optim.Adagrad(model.parameters(), lr=0.01)

Question 4
Initialize the Conv2d layers in the network defined in Question-1 (Net) with all ones (both
weights and bias). Train the network with Adam optimizer and report the final test accuracy by
running the cell below. (If you are not getting the exact number shown in options, please report
the closest number).
1. 11%
2. 18%
3. 97%
4. 6%
def init_weights(m):
if isinstance(m, nn.Conv2d):
torch.nn.init.ones_(m.weight.data)
torch.nn.init.ones_(m.bias.data)
model.apply(init_weights)

Question 5
Initialize the network defined in Question-1 (Net) with Xavier's initialization
(torch.nn.init.xavier_normal)(for bias use zero). Train the network with Adam optimizer and
report the final test accuracy by running the cell below. (If you are not getting the exact number
shown in options, please report the closest number).
1. 88%
2. 74%
3. 97%
4. 80%
def init_weights(m):
if isinstance(m, nn.Conv2d):
torch.nn.init.xavier_normal_(m.weight.data)
torch.nn.init.zeros_(m.bias.data)
model.apply(init_weights)
Question 6
Add three batch-norm layers to the network defined in Question-1 and report the final test
accuracy by running the cell below.
1. 92%
2. 89%
3. 98%
4. 74%
class NetBatchNorm(nn.Module):
def __init__(self):
super(NetBatchNorm, self).__init__()
# define a batchnorm layer (2d) with 16 features




x = self.conv1(x)
x = self.bn1(x)
x = F.relu(x)
x = self.conv2(x)
x = self.bn2(x)
x = F.relu(x)
x = self.conv3(x)
x = self.bn3(x)
x = F.relu(x)
x = F.max_pool2d(x, 2)
x = torch.flatten(x, 1)
x = self.fc1(x)
x = F.relu(x)
x = self.fc2(x)
return output
# If you implemented the batchnorm layers correctly this cell should

return true
sum(p.numel() for p in model.parameters()) == 1016170
True
model = NetBatchNorm().to(device)

Part-2
This section is un-graded and purely for practice.
Main focus of this part is to help you flex the deep learning muscles built in the above part. You
should build a network on the SVHN dataset. This dataset is similar to MNIST but unlike MNIST,
the images are colored and more complex.
As of writing this, the state-of-the-art(SoTA) performance on this dataset is 98.98%. You can try
to start with the simple network we defined above for the MNSIT dataset(with some
modification for dealing with different sized colored images unlike MNIST). But to achive the
SoTA performance you need to do a lot of hackery. These are list of few things, we would
encourage you to try:
• Use data augmentation wisely. Read and understand how to perform the
augmentations listed below.
– RandomFlips, Color Jittering
– Cutout, Cutmix
– Mixup
– Auto-augment
• Try to increase the image size using standard image interpolation techniques. Try
using tricks like Progressive resizing of images and see if it helps.
• After certain number of layers, adding more layer might not be of much help, run
experiments on SVHN and see if you observe this.
• To understand the difficulties in training deeper networks read this paper: Deep
Residual Learning for Image Recognition
• To improve the performance on SVHN, try using architectures like ResNet,

DesnseNet or EfficientNet. Most of these architectures are available by default in
PyTorch.

Assignment 3 DS5620

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Assignment 3 DS5620

Uploaded by

Copyright:

Available Formats

Welcome to Assignment 3 on Computer Vision.

# %matplotlib inline uncomment this line if you're running this

# DO NOT CHANGE THIS CODE

## Please DONOT remove these lines.

Data Loading and Pre-processing

# define a set of transforms for preparing the dataset

# Load the MNIST training, test datasets using

# create dataloaders for training and test datasets

# define a conv layer with output channels as 32, kernel size

# define a conv layer with output channels as 64, kernel size

# define a max pooling layer with kernel size 2

# define dropout layer with a probability of 0.5

# define a linear(dense) layer with 128 output features

# define a linear(dense) layer with output features

#### YOUR CODE ENDS HERE ####

def forward(self, x):

#### YOUR CODE ENDS HERE ####

<unittest.runner.TextTestResult run=3 errors=0 failures=0>

Training and Inference

# pass the image to the model and assign the output to

#### YOUR CODE ENDS HERE ####

print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}

## Define Adam Optimiser with a learning rate of 0.01

# define a conv layer with output channels as 32, kernel size

# define a conv layer with output channels as 64, kernel size

# define dropout layer with a probability of 0.25

# define dropout layer with a probability of 0.5

# define a linear(dense) layer with 128 output features

# define a linear(dense) layer with output features

#### YOUR CODE ENDS HERE ####

def forward(self, x):

## Define Adam Optimiser with a learning rate of 0.01

for epoch in range(1, 11):

## Define Adam Optimiser with a learning rate of 0.01

for epoch in range(1, 11):

for epoch in range(1, 5):

# define a batchnorm layer (2d) with 16 features

# define a conv layer with output channels as 32, kernel size

# define a batchnorm layer (2d) with 32 features

# define a conv layer with output channels as 64, kernel size

# define a batchnorm layer (2d) with 64 features

# define dropout layer with a probability of 0.25

# define dropout layer with a probability of 0.5

# define a linear(dense) layer with 128 output features

# define a linear(dense) layer with output features

#### YOUR CODE ENDS HERE ####

def forward(self, x):

# If you implemented the batchnorm layers correctly this cell should

## Define Adam Optimiser with a learning rate of 0.01

• To improve the performance on SVHN, try using architectures like ResNet,

You might also like