Chapter 5: Convolutional Neural Networks (CNNs)
Practical Exercises Chapter 5
Exercise 1: Implementing a Basic CNN for Image Classification
Task: Implement a simple CNN from scratch to classify images from the MNIST dataset. Train the model for a few epochs and evaluate its accuracy.
Solution:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
# Define the CNN model
class SimpleCNN(nn.Module):
def __init__(self):
super(SimpleCNN, self).__init__()
self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
self.fc1 = nn.Linear(64 * 5 * 5, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = self.pool(torch.relu(self.conv1(x)))
x = self.pool(torch.relu(self.conv2(x)))
x = x.view(-1, 64 * 5 * 5)
x = torch.relu(self.fc1(x))
return self.fc2(x)
# Define transformations and load the MNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# Instantiate the model, define the loss function and optimizer
model = SimpleCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Train the model for 5 epochs
epochs = 5
for epoch in range(epochs):
running_loss = 0.0
for inputs, labels in train_loader:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")
# Evaluate the model (Optional: Load test set and compute accuracy)
In this exercise, we implemented a simple CNN to classify the MNIST dataset, trained the model using the Adam optimizer, and printed the loss after each epoch. You can extend this by loading a test set and computing accuracy.
Exercise 2: Fine-Tuning a Pretrained ResNet for CIFAR-10
Task: Fine-tune a pretrained ResNet-18 model on the CIFAR-10 dataset by replacing the final fully connected layer with a layer that outputs 10 classes. Train the model and evaluate its accuracy on the test set.
Solution:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import torch.optim as optim
# Load CIFAR-10 dataset
transform = transforms.Compose([
transforms.Resize(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# Load pretrained ResNet-18 model and modify the final layer
model = models.resnet18(pretrained=True)
model.fc = torch.nn.Linear(model.fc.in_features, 10)
# Define loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Train the model
model.train()
for epoch in range(5):
running_loss = 0.0
for inputs, labels in train_loader:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")
# Evaluate the model (Optional: Load test set and compute accuracy)
In this exercise, we loaded a pretrained ResNet-18 model and modified its final fully connected layer to fit the CIFAR-10 dataset (10 classes). After training for a few epochs, the model can be evaluated on the test set.
Exercise 3: Object Detection Using Faster R-CNN
Task: Use a pretrained Faster R-CNN model to detect objects in an image. Load the model, preprocess the input image, and print the detected objects and their bounding boxes.
Solution:
import torch
import torchvision
from PIL import Image
import torchvision.transforms as transforms
# Load a pretrained Faster R-CNN model
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model.eval() # Set model to evaluation mode
# Load and preprocess the image
image = Image.open("test_image.jpg")
transform = transforms.Compose([transforms.ToTensor()])
image_tensor = transform(image).unsqueeze(0) # Add batch dimension
# Perform object detection
with torch.no_grad():
predictions = model(image_tensor)
# Print the predicted bounding boxes and labels
print(predictions)
In this exercise:
- We loaded a pretrained Faster R-CNN model to perform object detection on a given image.
- The detected objects and their bounding boxes are printed in the output. You can visualize these boxes on the image for a better understanding of the predictions.
Exercise 4: Implementing Inception Module in a Custom CNN
Task: Implement an Inception module from scratch and integrate it into a custom CNN. Train this model on a dataset such as CIFAR-10.
Solution:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
# Define the Inception module
class InceptionModule(nn.Module):
def __init__(self, in_channels):
super(InceptionModule, self).__init__()
self.branch1x1 = nn.Conv2d(in_channels, 64, kernel_size=1)
self.branch3x3 = nn.Sequential(
nn.Conv2d(in_channels, 128, kernel_size=1),
nn.Conv2d(128, 128, kernel_size=3, padding=1)
)
self.branch5x5 = nn.Sequential(
nn.Conv2d(in_channels, 32, kernel_size=1),
nn.Conv2d(32, 32, kernel_size=5, padding=2)
)
self.branch_pool = nn.Sequential(
nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
nn.Conv2d(in_channels, 32, kernel_size=1)
)
def forward(self, x):
branch1x1 = self.branch1x1(x)
branch3x3 = self.branch3x3(x)
branch5x5 = self.branch5x5(x)
branch_pool = self.branch_pool(x)
outputs = [branch1x1, branch3x3, branch5x5, branch_pool]
return torch.cat(outputs, 1)
# Define the custom CNN using the Inception module
class CustomCNN(nn.Module):
def __init__(self):
super(CustomCNN, self).__init__()
self.inception1 = InceptionModule(in_channels=3)
self.fc = nn.Linear(64 + 128 + 32 + 32, 10) # Adjust output to 10 classes
def forward(self, x):
x = self.inception1(x)
x = torch.mean(x, dim=[2, 3]) # Global average pooling
return self.fc(x)
# Define the data transformations and load CIFAR-10 dataset
transform = transforms.Compose([transforms.ToTensor()])
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# Instantiate the model, define the loss function and optimizer
model = CustomCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Train the model
for epoch in range(5):
running_loss = 0.0
for inputs, labels in train_loader:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")
In this exercise, we implemented a custom Inception module and integrated it into a CNN. The model is trained on the CIFAR-10 dataset using the Adam optimizer.
These practical exercises give hands-on experience in key CNN tasks, including building simple CNNs, fine-tuning pretrained models, performing object detection, and implementing advanced modules like Inception. By completing these exercises, you’ll be able to apply CNNs to a wide range of real-world applications.
Practical Exercises Chapter 5
Exercise 1: Implementing a Basic CNN for Image Classification
Task: Implement a simple CNN from scratch to classify images from the MNIST dataset. Train the model for a few epochs and evaluate its accuracy.
Solution:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
# Define the CNN model
class SimpleCNN(nn.Module):
def __init__(self):
super(SimpleCNN, self).__init__()
self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
self.fc1 = nn.Linear(64 * 5 * 5, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = self.pool(torch.relu(self.conv1(x)))
x = self.pool(torch.relu(self.conv2(x)))
x = x.view(-1, 64 * 5 * 5)
x = torch.relu(self.fc1(x))
return self.fc2(x)
# Define transformations and load the MNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# Instantiate the model, define the loss function and optimizer
model = SimpleCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Train the model for 5 epochs
epochs = 5
for epoch in range(epochs):
running_loss = 0.0
for inputs, labels in train_loader:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")
# Evaluate the model (Optional: Load test set and compute accuracy)
In this exercise, we implemented a simple CNN to classify the MNIST dataset, trained the model using the Adam optimizer, and printed the loss after each epoch. You can extend this by loading a test set and computing accuracy.
Exercise 2: Fine-Tuning a Pretrained ResNet for CIFAR-10
Task: Fine-tune a pretrained ResNet-18 model on the CIFAR-10 dataset by replacing the final fully connected layer with a layer that outputs 10 classes. Train the model and evaluate its accuracy on the test set.
Solution:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import torch.optim as optim
# Load CIFAR-10 dataset
transform = transforms.Compose([
transforms.Resize(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# Load pretrained ResNet-18 model and modify the final layer
model = models.resnet18(pretrained=True)
model.fc = torch.nn.Linear(model.fc.in_features, 10)
# Define loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Train the model
model.train()
for epoch in range(5):
running_loss = 0.0
for inputs, labels in train_loader:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")
# Evaluate the model (Optional: Load test set and compute accuracy)
In this exercise, we loaded a pretrained ResNet-18 model and modified its final fully connected layer to fit the CIFAR-10 dataset (10 classes). After training for a few epochs, the model can be evaluated on the test set.
Exercise 3: Object Detection Using Faster R-CNN
Task: Use a pretrained Faster R-CNN model to detect objects in an image. Load the model, preprocess the input image, and print the detected objects and their bounding boxes.
Solution:
import torch
import torchvision
from PIL import Image
import torchvision.transforms as transforms
# Load a pretrained Faster R-CNN model
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model.eval() # Set model to evaluation mode
# Load and preprocess the image
image = Image.open("test_image.jpg")
transform = transforms.Compose([transforms.ToTensor()])
image_tensor = transform(image).unsqueeze(0) # Add batch dimension
# Perform object detection
with torch.no_grad():
predictions = model(image_tensor)
# Print the predicted bounding boxes and labels
print(predictions)
In this exercise:
- We loaded a pretrained Faster R-CNN model to perform object detection on a given image.
- The detected objects and their bounding boxes are printed in the output. You can visualize these boxes on the image for a better understanding of the predictions.
Exercise 4: Implementing Inception Module in a Custom CNN
Task: Implement an Inception module from scratch and integrate it into a custom CNN. Train this model on a dataset such as CIFAR-10.
Solution:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
# Define the Inception module
class InceptionModule(nn.Module):
def __init__(self, in_channels):
super(InceptionModule, self).__init__()
self.branch1x1 = nn.Conv2d(in_channels, 64, kernel_size=1)
self.branch3x3 = nn.Sequential(
nn.Conv2d(in_channels, 128, kernel_size=1),
nn.Conv2d(128, 128, kernel_size=3, padding=1)
)
self.branch5x5 = nn.Sequential(
nn.Conv2d(in_channels, 32, kernel_size=1),
nn.Conv2d(32, 32, kernel_size=5, padding=2)
)
self.branch_pool = nn.Sequential(
nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
nn.Conv2d(in_channels, 32, kernel_size=1)
)
def forward(self, x):
branch1x1 = self.branch1x1(x)
branch3x3 = self.branch3x3(x)
branch5x5 = self.branch5x5(x)
branch_pool = self.branch_pool(x)
outputs = [branch1x1, branch3x3, branch5x5, branch_pool]
return torch.cat(outputs, 1)
# Define the custom CNN using the Inception module
class CustomCNN(nn.Module):
def __init__(self):
super(CustomCNN, self).__init__()
self.inception1 = InceptionModule(in_channels=3)
self.fc = nn.Linear(64 + 128 + 32 + 32, 10) # Adjust output to 10 classes
def forward(self, x):
x = self.inception1(x)
x = torch.mean(x, dim=[2, 3]) # Global average pooling
return self.fc(x)
# Define the data transformations and load CIFAR-10 dataset
transform = transforms.Compose([transforms.ToTensor()])
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# Instantiate the model, define the loss function and optimizer
model = CustomCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Train the model
for epoch in range(5):
running_loss = 0.0
for inputs, labels in train_loader:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")
In this exercise, we implemented a custom Inception module and integrated it into a CNN. The model is trained on the CIFAR-10 dataset using the Adam optimizer.
These practical exercises give hands-on experience in key CNN tasks, including building simple CNNs, fine-tuning pretrained models, performing object detection, and implementing advanced modules like Inception. By completing these exercises, you’ll be able to apply CNNs to a wide range of real-world applications.
Practical Exercises Chapter 5
Exercise 1: Implementing a Basic CNN for Image Classification
Task: Implement a simple CNN from scratch to classify images from the MNIST dataset. Train the model for a few epochs and evaluate its accuracy.
Solution:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
# Define the CNN model
class SimpleCNN(nn.Module):
def __init__(self):
super(SimpleCNN, self).__init__()
self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
self.fc1 = nn.Linear(64 * 5 * 5, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = self.pool(torch.relu(self.conv1(x)))
x = self.pool(torch.relu(self.conv2(x)))
x = x.view(-1, 64 * 5 * 5)
x = torch.relu(self.fc1(x))
return self.fc2(x)
# Define transformations and load the MNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# Instantiate the model, define the loss function and optimizer
model = SimpleCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Train the model for 5 epochs
epochs = 5
for epoch in range(epochs):
running_loss = 0.0
for inputs, labels in train_loader:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")
# Evaluate the model (Optional: Load test set and compute accuracy)
In this exercise, we implemented a simple CNN to classify the MNIST dataset, trained the model using the Adam optimizer, and printed the loss after each epoch. You can extend this by loading a test set and computing accuracy.
Exercise 2: Fine-Tuning a Pretrained ResNet for CIFAR-10
Task: Fine-tune a pretrained ResNet-18 model on the CIFAR-10 dataset by replacing the final fully connected layer with a layer that outputs 10 classes. Train the model and evaluate its accuracy on the test set.
Solution:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import torch.optim as optim
# Load CIFAR-10 dataset
transform = transforms.Compose([
transforms.Resize(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# Load pretrained ResNet-18 model and modify the final layer
model = models.resnet18(pretrained=True)
model.fc = torch.nn.Linear(model.fc.in_features, 10)
# Define loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Train the model
model.train()
for epoch in range(5):
running_loss = 0.0
for inputs, labels in train_loader:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")
# Evaluate the model (Optional: Load test set and compute accuracy)
In this exercise, we loaded a pretrained ResNet-18 model and modified its final fully connected layer to fit the CIFAR-10 dataset (10 classes). After training for a few epochs, the model can be evaluated on the test set.
Exercise 3: Object Detection Using Faster R-CNN
Task: Use a pretrained Faster R-CNN model to detect objects in an image. Load the model, preprocess the input image, and print the detected objects and their bounding boxes.
Solution:
import torch
import torchvision
from PIL import Image
import torchvision.transforms as transforms
# Load a pretrained Faster R-CNN model
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model.eval() # Set model to evaluation mode
# Load and preprocess the image
image = Image.open("test_image.jpg")
transform = transforms.Compose([transforms.ToTensor()])
image_tensor = transform(image).unsqueeze(0) # Add batch dimension
# Perform object detection
with torch.no_grad():
predictions = model(image_tensor)
# Print the predicted bounding boxes and labels
print(predictions)
In this exercise:
- We loaded a pretrained Faster R-CNN model to perform object detection on a given image.
- The detected objects and their bounding boxes are printed in the output. You can visualize these boxes on the image for a better understanding of the predictions.
Exercise 4: Implementing Inception Module in a Custom CNN
Task: Implement an Inception module from scratch and integrate it into a custom CNN. Train this model on a dataset such as CIFAR-10.
Solution:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
# Define the Inception module
class InceptionModule(nn.Module):
def __init__(self, in_channels):
super(InceptionModule, self).__init__()
self.branch1x1 = nn.Conv2d(in_channels, 64, kernel_size=1)
self.branch3x3 = nn.Sequential(
nn.Conv2d(in_channels, 128, kernel_size=1),
nn.Conv2d(128, 128, kernel_size=3, padding=1)
)
self.branch5x5 = nn.Sequential(
nn.Conv2d(in_channels, 32, kernel_size=1),
nn.Conv2d(32, 32, kernel_size=5, padding=2)
)
self.branch_pool = nn.Sequential(
nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
nn.Conv2d(in_channels, 32, kernel_size=1)
)
def forward(self, x):
branch1x1 = self.branch1x1(x)
branch3x3 = self.branch3x3(x)
branch5x5 = self.branch5x5(x)
branch_pool = self.branch_pool(x)
outputs = [branch1x1, branch3x3, branch5x5, branch_pool]
return torch.cat(outputs, 1)
# Define the custom CNN using the Inception module
class CustomCNN(nn.Module):
def __init__(self):
super(CustomCNN, self).__init__()
self.inception1 = InceptionModule(in_channels=3)
self.fc = nn.Linear(64 + 128 + 32 + 32, 10) # Adjust output to 10 classes
def forward(self, x):
x = self.inception1(x)
x = torch.mean(x, dim=[2, 3]) # Global average pooling
return self.fc(x)
# Define the data transformations and load CIFAR-10 dataset
transform = transforms.Compose([transforms.ToTensor()])
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# Instantiate the model, define the loss function and optimizer
model = CustomCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Train the model
for epoch in range(5):
running_loss = 0.0
for inputs, labels in train_loader:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")
In this exercise, we implemented a custom Inception module and integrated it into a CNN. The model is trained on the CIFAR-10 dataset using the Adam optimizer.
These practical exercises give hands-on experience in key CNN tasks, including building simple CNNs, fine-tuning pretrained models, performing object detection, and implementing advanced modules like Inception. By completing these exercises, you’ll be able to apply CNNs to a wide range of real-world applications.
Practical Exercises Chapter 5
Exercise 1: Implementing a Basic CNN for Image Classification
Task: Implement a simple CNN from scratch to classify images from the MNIST dataset. Train the model for a few epochs and evaluate its accuracy.
Solution:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
# Define the CNN model
class SimpleCNN(nn.Module):
def __init__(self):
super(SimpleCNN, self).__init__()
self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
self.fc1 = nn.Linear(64 * 5 * 5, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = self.pool(torch.relu(self.conv1(x)))
x = self.pool(torch.relu(self.conv2(x)))
x = x.view(-1, 64 * 5 * 5)
x = torch.relu(self.fc1(x))
return self.fc2(x)
# Define transformations and load the MNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# Instantiate the model, define the loss function and optimizer
model = SimpleCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Train the model for 5 epochs
epochs = 5
for epoch in range(epochs):
running_loss = 0.0
for inputs, labels in train_loader:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")
# Evaluate the model (Optional: Load test set and compute accuracy)
In this exercise, we implemented a simple CNN to classify the MNIST dataset, trained the model using the Adam optimizer, and printed the loss after each epoch. You can extend this by loading a test set and computing accuracy.
Exercise 2: Fine-Tuning a Pretrained ResNet for CIFAR-10
Task: Fine-tune a pretrained ResNet-18 model on the CIFAR-10 dataset by replacing the final fully connected layer with a layer that outputs 10 classes. Train the model and evaluate its accuracy on the test set.
Solution:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import torch.optim as optim
# Load CIFAR-10 dataset
transform = transforms.Compose([
transforms.Resize(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# Load pretrained ResNet-18 model and modify the final layer
model = models.resnet18(pretrained=True)
model.fc = torch.nn.Linear(model.fc.in_features, 10)
# Define loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Train the model
model.train()
for epoch in range(5):
running_loss = 0.0
for inputs, labels in train_loader:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")
# Evaluate the model (Optional: Load test set and compute accuracy)
In this exercise, we loaded a pretrained ResNet-18 model and modified its final fully connected layer to fit the CIFAR-10 dataset (10 classes). After training for a few epochs, the model can be evaluated on the test set.
Exercise 3: Object Detection Using Faster R-CNN
Task: Use a pretrained Faster R-CNN model to detect objects in an image. Load the model, preprocess the input image, and print the detected objects and their bounding boxes.
Solution:
import torch
import torchvision
from PIL import Image
import torchvision.transforms as transforms
# Load a pretrained Faster R-CNN model
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model.eval() # Set model to evaluation mode
# Load and preprocess the image
image = Image.open("test_image.jpg")
transform = transforms.Compose([transforms.ToTensor()])
image_tensor = transform(image).unsqueeze(0) # Add batch dimension
# Perform object detection
with torch.no_grad():
predictions = model(image_tensor)
# Print the predicted bounding boxes and labels
print(predictions)
In this exercise:
- We loaded a pretrained Faster R-CNN model to perform object detection on a given image.
- The detected objects and their bounding boxes are printed in the output. You can visualize these boxes on the image for a better understanding of the predictions.
Exercise 4: Implementing Inception Module in a Custom CNN
Task: Implement an Inception module from scratch and integrate it into a custom CNN. Train this model on a dataset such as CIFAR-10.
Solution:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
# Define the Inception module
class InceptionModule(nn.Module):
def __init__(self, in_channels):
super(InceptionModule, self).__init__()
self.branch1x1 = nn.Conv2d(in_channels, 64, kernel_size=1)
self.branch3x3 = nn.Sequential(
nn.Conv2d(in_channels, 128, kernel_size=1),
nn.Conv2d(128, 128, kernel_size=3, padding=1)
)
self.branch5x5 = nn.Sequential(
nn.Conv2d(in_channels, 32, kernel_size=1),
nn.Conv2d(32, 32, kernel_size=5, padding=2)
)
self.branch_pool = nn.Sequential(
nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
nn.Conv2d(in_channels, 32, kernel_size=1)
)
def forward(self, x):
branch1x1 = self.branch1x1(x)
branch3x3 = self.branch3x3(x)
branch5x5 = self.branch5x5(x)
branch_pool = self.branch_pool(x)
outputs = [branch1x1, branch3x3, branch5x5, branch_pool]
return torch.cat(outputs, 1)
# Define the custom CNN using the Inception module
class CustomCNN(nn.Module):
def __init__(self):
super(CustomCNN, self).__init__()
self.inception1 = InceptionModule(in_channels=3)
self.fc = nn.Linear(64 + 128 + 32 + 32, 10) # Adjust output to 10 classes
def forward(self, x):
x = self.inception1(x)
x = torch.mean(x, dim=[2, 3]) # Global average pooling
return self.fc(x)
# Define the data transformations and load CIFAR-10 dataset
transform = transforms.Compose([transforms.ToTensor()])
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# Instantiate the model, define the loss function and optimizer
model = CustomCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Train the model
for epoch in range(5):
running_loss = 0.0
for inputs, labels in train_loader:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")
In this exercise, we implemented a custom Inception module and integrated it into a CNN. The model is trained on the CIFAR-10 dataset using the Adam optimizer.
These practical exercises give hands-on experience in key CNN tasks, including building simple CNNs, fine-tuning pretrained models, performing object detection, and implementing advanced modules like Inception. By completing these exercises, you’ll be able to apply CNNs to a wide range of real-world applications.