Chapter 7: Advanced Deep Learning Concepts
Practical Exercises Chapter 7
Exercise 1: Build and Train a Simple Autoencoder
Task: Build and train a simple autoencoder to reconstruct images from the MNIST dataset. Evaluate the quality of the reconstructions by visualizing the original and reconstructed images.
Solution:
import tensorflow as tf
from tensorflow.keras import layers, models
# Load the MNIST dataset
(x_train, _), (x_test, _) = tf.keras.datasets.mnist.load_data()
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = x_train.reshape((len(x_train), 28, 28, 1))
x_test = x_test.reshape((len(x_test), 28, 28, 1))
# Build the autoencoder model
input_img = layers.Input(shape=(28, 28, 1))
# Encoder
x = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(input_img)
x = layers.MaxPooling2D((2, 2), padding='same')(x)
x = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded = layers.MaxPooling2D((2, 2), padding='same')(x)
# Decoder
x = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(encoded)
x = layers.UpSampling2D((2, 2))(x)
x = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(x)
x = layers.UpSampling2D((2, 2))(x)
decoded = layers.Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
# Compile and train the model
autoencoder = models.Model(input_img, decoded)
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')
autoencoder.fit(x_train, x_train, epochs=10, batch_size=256, validation_data=(x_test, x_test))
# Visualize some reconstructions
import matplotlib.pyplot as plt
decoded_imgs = autoencoder.predict(x_test[:10])
n = 10
plt.figure(figsize=(20, 4))
for i in range(n):
# Display original images
ax = plt.subplot(2, n, i + 1)
plt.imshow(x_test[i].reshape(28, 28), cmap='gray')
plt.title("Original")
# Display reconstructed images
ax = plt.subplot(2, n, i + 1 + n)
plt.imshow(decoded_imgs[i].reshape(28, 28), cmap='gray')
plt.title("Reconstructed")
plt.show()
In this exercise:
- We built a simple convolutional autoencoder.
- The model is trained to reconstruct MNIST images, learning a compressed representation in the encoder.
- We visualize the original and reconstructed images to evaluate the performance.
Exercise 2: Implement a Variational Autoencoder (VAE)
Task: Implement a Variational Autoencoder (VAE) and train it on the MNIST dataset. After training, sample from the learned latent space and generate new handwritten digits.
Solution:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
# Sampling function for latent space
def sampling(args):
z_mean, z_log_var = args
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
return z_mean + tf.exp(0.5 * z_log_var) * epsilon
# Encoder
latent_dim = 2
inputs = layers.Input(shape=(28, 28, 1))
x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")(inputs)
x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Flatten()(x)
x = layers.Dense(16, activation="relu")(x)
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
# Latent space sampling
z = layers.Lambda(sampling, output_shape=(latent_dim,), name="z")([z_mean, z_log_var])
# Decoder
decoder_input = layers.Input(shape=(latent_dim,))
x = layers.Dense(7 * 7 * 64, activation="relu")(decoder_input)
x = layers.Reshape((7, 7, 64))(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same")(x)
decoder_output = layers.Conv2DTranspose(1, 3, activation="sigmoid", padding="same")(x)
# VAE model
encoder = models.Model(inputs, [z_mean, z_log_var, z], name="encoder")
decoder = models.Model(decoder_input, decoder_output, name="decoder")
vae_output = decoder(encoder(inputs)[2])
vae = models.Model(inputs, vae_output, name="vae")
# VAE loss function (reconstruction + KL divergence)
reconstruction_loss = tf.keras.losses.binary_crossentropy(tf.keras.backend.flatten(inputs), tf.keras.backend.flatten(vae_output))
reconstruction_loss *= 28 * 28
kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
kl_loss = tf.reduce_mean(-0.5 * tf.reduce_sum(kl_loss, axis=-1))
vae_loss = tf.reduce_mean(reconstruction_loss + kl_loss)
vae.add_loss(vae_loss)
vae.compile(optimizer="adam")
# Train the VAE
vae.fit(x_train, x_train, epochs=10, batch_size=128, validation_data=(x_test, x_test))
# Generate new images by sampling from the latent space
import matplotlib.pyplot as plt
n = 10 # Number of images to generate
figure = np.zeros((28 * n, 28 * n))
grid_x = np.linspace(-2, 2, n)
grid_y = np.linspace(-2, 2, n)
for i, yi in enumerate(grid_x):
for j, xi in enumerate(grid_y):
z_sample = np.array([[xi, yi]])
x_decoded = decoder.predict(z_sample)
digit = x_decoded[0].reshape(28, 28)
figure[i * 28: (i + 1) * 28, j * 28: (j + 1) * 28] = digit
plt.figure(figsize=(10, 10))
plt.imshow(figure, cmap='Greys_r')
plt.show()
In this exercise:
- We implemented a VAE to learn a probabilistic latent space of MNIST digits.
- The model was trained to minimize the reconstruction error and KL divergence.
- After training, we sampled from the learned latent space to generate new images of handwritten digits.
Exercise 3: Fine-Tune a Pretrained ResNet Model for Image Classification
Task: Fine-tune a ResNet50 model pretrained on ImageNet for a new image classification task. Replace the final layer to match the number of classes in the custom dataset, and train the model on the new dataset.
Solution:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
# Load the ResNet50 model pretrained on ImageNet, excluding the top layer
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
# Freeze the base model layers
for layer in base_model.layers:
layer.trainable = False
# Add custom layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(10, activation='softmax')(x) # Output for 10 classes
# Create new model
model = Model(inputs=base_model.input, outputs=predictions)
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
# Example training (assuming new_data and labels are prepared)
# model.fit(new_data, labels, epochs=5, batch_size=32)
print("Model fine-tuned and ready for custom classification task.")
In this exercise:
- We used a ResNet50 model pretrained on ImageNet and fine-tuned it for a new image classification task with 10 classes.
- The final layer of the model was replaced to match the number of classes, and the base layers were frozen to retain the learned features.
Exercise 4: Self-Supervised Learning with Contrastive Loss
Task: Implement a self-supervised learning task using the SimCLR framework. Train a model to learn useful image representations using contrastive learning, and evaluate its performance on a downstream classification task.
Solution:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
class SimCLR(nn.Module):
def __init__(self, base_model, out_dim):
super(SimCLR, self).__init__()
self.encoder = base_model
self.projection = nn.Sequential(
nn.Linear(base_model.fc.in_features, 512),
nn.ReLU(),
nn.Linear(512, out_dim)
)
self.encoder.fc = nn.Identity() # Remove fully connected layer
def forward(self, x):
features = self.encoder(x)
projections = self.projection(features)
return projections
# Define a contrastive loss function
def contrastive_loss(z_i, z_j, temperature=0.5):
batch_size = z_i.size(0)
z = torch.cat([z_i, z_j], dim=0)
sim_matrix = torch.mm(z, z.t()) / temperature
labels = torch.arange(batch_size).cuda()
labels = torch.cat([labels, labels], dim=0)
mask = torch.eye(sim_matrix.size(0), device=sim_matrix.device).bool()
sim_matrix = sim_matrix.masked_fill(mask, -float('inf'))
loss = nn.CrossEntropyLoss()(sim_matrix, labels)
return loss
# Example training loop (assuming the dataset and dataloader are defined)
base_model = models.resnet18(pretrained=True)
simclr_model = SimCLR(base_model, out_dim=128).cuda()
optimizer = optim.Adam(simclr_model.parameters(), lr=0.001)
for epoch in range(10):
for images, _ in dataloader:
view_1, view_2 = images.cuda(), images.cuda()
z_i = simclr_model(view_1)
z_j = simclr_model(view_2)
loss = contrastive_loss(z_i, z_j)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")
In this exercise:
- We implemented a SimCLR model using self-supervised contrastive learning.
- The model learned representations by contrasting positive pairs (two augmented views of the same image) against negative pairs (views from other images).
- After pretraining, these learned representations can be used for downstream tasks like classification.
These practical exercises cover a wide range of advanced deep learning concepts, including autoencoders, VAEs, transfer learning, and self-supervised learning. By completing these exercises, you’ll gain hands-on experience in building and fine-tuning models, as well as in utilizing unsupervised learning techniques to learn useful representations from unlabeled data.
Practical Exercises Chapter 7
Exercise 1: Build and Train a Simple Autoencoder
Task: Build and train a simple autoencoder to reconstruct images from the MNIST dataset. Evaluate the quality of the reconstructions by visualizing the original and reconstructed images.
Solution:
import tensorflow as tf
from tensorflow.keras import layers, models
# Load the MNIST dataset
(x_train, _), (x_test, _) = tf.keras.datasets.mnist.load_data()
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = x_train.reshape((len(x_train), 28, 28, 1))
x_test = x_test.reshape((len(x_test), 28, 28, 1))
# Build the autoencoder model
input_img = layers.Input(shape=(28, 28, 1))
# Encoder
x = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(input_img)
x = layers.MaxPooling2D((2, 2), padding='same')(x)
x = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded = layers.MaxPooling2D((2, 2), padding='same')(x)
# Decoder
x = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(encoded)
x = layers.UpSampling2D((2, 2))(x)
x = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(x)
x = layers.UpSampling2D((2, 2))(x)
decoded = layers.Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
# Compile and train the model
autoencoder = models.Model(input_img, decoded)
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')
autoencoder.fit(x_train, x_train, epochs=10, batch_size=256, validation_data=(x_test, x_test))
# Visualize some reconstructions
import matplotlib.pyplot as plt
decoded_imgs = autoencoder.predict(x_test[:10])
n = 10
plt.figure(figsize=(20, 4))
for i in range(n):
# Display original images
ax = plt.subplot(2, n, i + 1)
plt.imshow(x_test[i].reshape(28, 28), cmap='gray')
plt.title("Original")
# Display reconstructed images
ax = plt.subplot(2, n, i + 1 + n)
plt.imshow(decoded_imgs[i].reshape(28, 28), cmap='gray')
plt.title("Reconstructed")
plt.show()
In this exercise:
- We built a simple convolutional autoencoder.
- The model is trained to reconstruct MNIST images, learning a compressed representation in the encoder.
- We visualize the original and reconstructed images to evaluate the performance.
Exercise 2: Implement a Variational Autoencoder (VAE)
Task: Implement a Variational Autoencoder (VAE) and train it on the MNIST dataset. After training, sample from the learned latent space and generate new handwritten digits.
Solution:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
# Sampling function for latent space
def sampling(args):
z_mean, z_log_var = args
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
return z_mean + tf.exp(0.5 * z_log_var) * epsilon
# Encoder
latent_dim = 2
inputs = layers.Input(shape=(28, 28, 1))
x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")(inputs)
x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Flatten()(x)
x = layers.Dense(16, activation="relu")(x)
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
# Latent space sampling
z = layers.Lambda(sampling, output_shape=(latent_dim,), name="z")([z_mean, z_log_var])
# Decoder
decoder_input = layers.Input(shape=(latent_dim,))
x = layers.Dense(7 * 7 * 64, activation="relu")(decoder_input)
x = layers.Reshape((7, 7, 64))(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same")(x)
decoder_output = layers.Conv2DTranspose(1, 3, activation="sigmoid", padding="same")(x)
# VAE model
encoder = models.Model(inputs, [z_mean, z_log_var, z], name="encoder")
decoder = models.Model(decoder_input, decoder_output, name="decoder")
vae_output = decoder(encoder(inputs)[2])
vae = models.Model(inputs, vae_output, name="vae")
# VAE loss function (reconstruction + KL divergence)
reconstruction_loss = tf.keras.losses.binary_crossentropy(tf.keras.backend.flatten(inputs), tf.keras.backend.flatten(vae_output))
reconstruction_loss *= 28 * 28
kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
kl_loss = tf.reduce_mean(-0.5 * tf.reduce_sum(kl_loss, axis=-1))
vae_loss = tf.reduce_mean(reconstruction_loss + kl_loss)
vae.add_loss(vae_loss)
vae.compile(optimizer="adam")
# Train the VAE
vae.fit(x_train, x_train, epochs=10, batch_size=128, validation_data=(x_test, x_test))
# Generate new images by sampling from the latent space
import matplotlib.pyplot as plt
n = 10 # Number of images to generate
figure = np.zeros((28 * n, 28 * n))
grid_x = np.linspace(-2, 2, n)
grid_y = np.linspace(-2, 2, n)
for i, yi in enumerate(grid_x):
for j, xi in enumerate(grid_y):
z_sample = np.array([[xi, yi]])
x_decoded = decoder.predict(z_sample)
digit = x_decoded[0].reshape(28, 28)
figure[i * 28: (i + 1) * 28, j * 28: (j + 1) * 28] = digit
plt.figure(figsize=(10, 10))
plt.imshow(figure, cmap='Greys_r')
plt.show()
In this exercise:
- We implemented a VAE to learn a probabilistic latent space of MNIST digits.
- The model was trained to minimize the reconstruction error and KL divergence.
- After training, we sampled from the learned latent space to generate new images of handwritten digits.
Exercise 3: Fine-Tune a Pretrained ResNet Model for Image Classification
Task: Fine-tune a ResNet50 model pretrained on ImageNet for a new image classification task. Replace the final layer to match the number of classes in the custom dataset, and train the model on the new dataset.
Solution:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
# Load the ResNet50 model pretrained on ImageNet, excluding the top layer
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
# Freeze the base model layers
for layer in base_model.layers:
layer.trainable = False
# Add custom layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(10, activation='softmax')(x) # Output for 10 classes
# Create new model
model = Model(inputs=base_model.input, outputs=predictions)
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
# Example training (assuming new_data and labels are prepared)
# model.fit(new_data, labels, epochs=5, batch_size=32)
print("Model fine-tuned and ready for custom classification task.")
In this exercise:
- We used a ResNet50 model pretrained on ImageNet and fine-tuned it for a new image classification task with 10 classes.
- The final layer of the model was replaced to match the number of classes, and the base layers were frozen to retain the learned features.
Exercise 4: Self-Supervised Learning with Contrastive Loss
Task: Implement a self-supervised learning task using the SimCLR framework. Train a model to learn useful image representations using contrastive learning, and evaluate its performance on a downstream classification task.
Solution:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
class SimCLR(nn.Module):
def __init__(self, base_model, out_dim):
super(SimCLR, self).__init__()
self.encoder = base_model
self.projection = nn.Sequential(
nn.Linear(base_model.fc.in_features, 512),
nn.ReLU(),
nn.Linear(512, out_dim)
)
self.encoder.fc = nn.Identity() # Remove fully connected layer
def forward(self, x):
features = self.encoder(x)
projections = self.projection(features)
return projections
# Define a contrastive loss function
def contrastive_loss(z_i, z_j, temperature=0.5):
batch_size = z_i.size(0)
z = torch.cat([z_i, z_j], dim=0)
sim_matrix = torch.mm(z, z.t()) / temperature
labels = torch.arange(batch_size).cuda()
labels = torch.cat([labels, labels], dim=0)
mask = torch.eye(sim_matrix.size(0), device=sim_matrix.device).bool()
sim_matrix = sim_matrix.masked_fill(mask, -float('inf'))
loss = nn.CrossEntropyLoss()(sim_matrix, labels)
return loss
# Example training loop (assuming the dataset and dataloader are defined)
base_model = models.resnet18(pretrained=True)
simclr_model = SimCLR(base_model, out_dim=128).cuda()
optimizer = optim.Adam(simclr_model.parameters(), lr=0.001)
for epoch in range(10):
for images, _ in dataloader:
view_1, view_2 = images.cuda(), images.cuda()
z_i = simclr_model(view_1)
z_j = simclr_model(view_2)
loss = contrastive_loss(z_i, z_j)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")
In this exercise:
- We implemented a SimCLR model using self-supervised contrastive learning.
- The model learned representations by contrasting positive pairs (two augmented views of the same image) against negative pairs (views from other images).
- After pretraining, these learned representations can be used for downstream tasks like classification.
These practical exercises cover a wide range of advanced deep learning concepts, including autoencoders, VAEs, transfer learning, and self-supervised learning. By completing these exercises, you’ll gain hands-on experience in building and fine-tuning models, as well as in utilizing unsupervised learning techniques to learn useful representations from unlabeled data.
Practical Exercises Chapter 7
Exercise 1: Build and Train a Simple Autoencoder
Task: Build and train a simple autoencoder to reconstruct images from the MNIST dataset. Evaluate the quality of the reconstructions by visualizing the original and reconstructed images.
Solution:
import tensorflow as tf
from tensorflow.keras import layers, models
# Load the MNIST dataset
(x_train, _), (x_test, _) = tf.keras.datasets.mnist.load_data()
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = x_train.reshape((len(x_train), 28, 28, 1))
x_test = x_test.reshape((len(x_test), 28, 28, 1))
# Build the autoencoder model
input_img = layers.Input(shape=(28, 28, 1))
# Encoder
x = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(input_img)
x = layers.MaxPooling2D((2, 2), padding='same')(x)
x = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded = layers.MaxPooling2D((2, 2), padding='same')(x)
# Decoder
x = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(encoded)
x = layers.UpSampling2D((2, 2))(x)
x = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(x)
x = layers.UpSampling2D((2, 2))(x)
decoded = layers.Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
# Compile and train the model
autoencoder = models.Model(input_img, decoded)
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')
autoencoder.fit(x_train, x_train, epochs=10, batch_size=256, validation_data=(x_test, x_test))
# Visualize some reconstructions
import matplotlib.pyplot as plt
decoded_imgs = autoencoder.predict(x_test[:10])
n = 10
plt.figure(figsize=(20, 4))
for i in range(n):
# Display original images
ax = plt.subplot(2, n, i + 1)
plt.imshow(x_test[i].reshape(28, 28), cmap='gray')
plt.title("Original")
# Display reconstructed images
ax = plt.subplot(2, n, i + 1 + n)
plt.imshow(decoded_imgs[i].reshape(28, 28), cmap='gray')
plt.title("Reconstructed")
plt.show()
In this exercise:
- We built a simple convolutional autoencoder.
- The model is trained to reconstruct MNIST images, learning a compressed representation in the encoder.
- We visualize the original and reconstructed images to evaluate the performance.
Exercise 2: Implement a Variational Autoencoder (VAE)
Task: Implement a Variational Autoencoder (VAE) and train it on the MNIST dataset. After training, sample from the learned latent space and generate new handwritten digits.
Solution:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
# Sampling function for latent space
def sampling(args):
z_mean, z_log_var = args
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
return z_mean + tf.exp(0.5 * z_log_var) * epsilon
# Encoder
latent_dim = 2
inputs = layers.Input(shape=(28, 28, 1))
x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")(inputs)
x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Flatten()(x)
x = layers.Dense(16, activation="relu")(x)
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
# Latent space sampling
z = layers.Lambda(sampling, output_shape=(latent_dim,), name="z")([z_mean, z_log_var])
# Decoder
decoder_input = layers.Input(shape=(latent_dim,))
x = layers.Dense(7 * 7 * 64, activation="relu")(decoder_input)
x = layers.Reshape((7, 7, 64))(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same")(x)
decoder_output = layers.Conv2DTranspose(1, 3, activation="sigmoid", padding="same")(x)
# VAE model
encoder = models.Model(inputs, [z_mean, z_log_var, z], name="encoder")
decoder = models.Model(decoder_input, decoder_output, name="decoder")
vae_output = decoder(encoder(inputs)[2])
vae = models.Model(inputs, vae_output, name="vae")
# VAE loss function (reconstruction + KL divergence)
reconstruction_loss = tf.keras.losses.binary_crossentropy(tf.keras.backend.flatten(inputs), tf.keras.backend.flatten(vae_output))
reconstruction_loss *= 28 * 28
kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
kl_loss = tf.reduce_mean(-0.5 * tf.reduce_sum(kl_loss, axis=-1))
vae_loss = tf.reduce_mean(reconstruction_loss + kl_loss)
vae.add_loss(vae_loss)
vae.compile(optimizer="adam")
# Train the VAE
vae.fit(x_train, x_train, epochs=10, batch_size=128, validation_data=(x_test, x_test))
# Generate new images by sampling from the latent space
import matplotlib.pyplot as plt
n = 10 # Number of images to generate
figure = np.zeros((28 * n, 28 * n))
grid_x = np.linspace(-2, 2, n)
grid_y = np.linspace(-2, 2, n)
for i, yi in enumerate(grid_x):
for j, xi in enumerate(grid_y):
z_sample = np.array([[xi, yi]])
x_decoded = decoder.predict(z_sample)
digit = x_decoded[0].reshape(28, 28)
figure[i * 28: (i + 1) * 28, j * 28: (j + 1) * 28] = digit
plt.figure(figsize=(10, 10))
plt.imshow(figure, cmap='Greys_r')
plt.show()
In this exercise:
- We implemented a VAE to learn a probabilistic latent space of MNIST digits.
- The model was trained to minimize the reconstruction error and KL divergence.
- After training, we sampled from the learned latent space to generate new images of handwritten digits.
Exercise 3: Fine-Tune a Pretrained ResNet Model for Image Classification
Task: Fine-tune a ResNet50 model pretrained on ImageNet for a new image classification task. Replace the final layer to match the number of classes in the custom dataset, and train the model on the new dataset.
Solution:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
# Load the ResNet50 model pretrained on ImageNet, excluding the top layer
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
# Freeze the base model layers
for layer in base_model.layers:
layer.trainable = False
# Add custom layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(10, activation='softmax')(x) # Output for 10 classes
# Create new model
model = Model(inputs=base_model.input, outputs=predictions)
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
# Example training (assuming new_data and labels are prepared)
# model.fit(new_data, labels, epochs=5, batch_size=32)
print("Model fine-tuned and ready for custom classification task.")
In this exercise:
- We used a ResNet50 model pretrained on ImageNet and fine-tuned it for a new image classification task with 10 classes.
- The final layer of the model was replaced to match the number of classes, and the base layers were frozen to retain the learned features.
Exercise 4: Self-Supervised Learning with Contrastive Loss
Task: Implement a self-supervised learning task using the SimCLR framework. Train a model to learn useful image representations using contrastive learning, and evaluate its performance on a downstream classification task.
Solution:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
class SimCLR(nn.Module):
def __init__(self, base_model, out_dim):
super(SimCLR, self).__init__()
self.encoder = base_model
self.projection = nn.Sequential(
nn.Linear(base_model.fc.in_features, 512),
nn.ReLU(),
nn.Linear(512, out_dim)
)
self.encoder.fc = nn.Identity() # Remove fully connected layer
def forward(self, x):
features = self.encoder(x)
projections = self.projection(features)
return projections
# Define a contrastive loss function
def contrastive_loss(z_i, z_j, temperature=0.5):
batch_size = z_i.size(0)
z = torch.cat([z_i, z_j], dim=0)
sim_matrix = torch.mm(z, z.t()) / temperature
labels = torch.arange(batch_size).cuda()
labels = torch.cat([labels, labels], dim=0)
mask = torch.eye(sim_matrix.size(0), device=sim_matrix.device).bool()
sim_matrix = sim_matrix.masked_fill(mask, -float('inf'))
loss = nn.CrossEntropyLoss()(sim_matrix, labels)
return loss
# Example training loop (assuming the dataset and dataloader are defined)
base_model = models.resnet18(pretrained=True)
simclr_model = SimCLR(base_model, out_dim=128).cuda()
optimizer = optim.Adam(simclr_model.parameters(), lr=0.001)
for epoch in range(10):
for images, _ in dataloader:
view_1, view_2 = images.cuda(), images.cuda()
z_i = simclr_model(view_1)
z_j = simclr_model(view_2)
loss = contrastive_loss(z_i, z_j)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")
In this exercise:
- We implemented a SimCLR model using self-supervised contrastive learning.
- The model learned representations by contrasting positive pairs (two augmented views of the same image) against negative pairs (views from other images).
- After pretraining, these learned representations can be used for downstream tasks like classification.
These practical exercises cover a wide range of advanced deep learning concepts, including autoencoders, VAEs, transfer learning, and self-supervised learning. By completing these exercises, you’ll gain hands-on experience in building and fine-tuning models, as well as in utilizing unsupervised learning techniques to learn useful representations from unlabeled data.
Practical Exercises Chapter 7
Exercise 1: Build and Train a Simple Autoencoder
Task: Build and train a simple autoencoder to reconstruct images from the MNIST dataset. Evaluate the quality of the reconstructions by visualizing the original and reconstructed images.
Solution:
import tensorflow as tf
from tensorflow.keras import layers, models
# Load the MNIST dataset
(x_train, _), (x_test, _) = tf.keras.datasets.mnist.load_data()
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = x_train.reshape((len(x_train), 28, 28, 1))
x_test = x_test.reshape((len(x_test), 28, 28, 1))
# Build the autoencoder model
input_img = layers.Input(shape=(28, 28, 1))
# Encoder
x = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(input_img)
x = layers.MaxPooling2D((2, 2), padding='same')(x)
x = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded = layers.MaxPooling2D((2, 2), padding='same')(x)
# Decoder
x = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(encoded)
x = layers.UpSampling2D((2, 2))(x)
x = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(x)
x = layers.UpSampling2D((2, 2))(x)
decoded = layers.Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
# Compile and train the model
autoencoder = models.Model(input_img, decoded)
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')
autoencoder.fit(x_train, x_train, epochs=10, batch_size=256, validation_data=(x_test, x_test))
# Visualize some reconstructions
import matplotlib.pyplot as plt
decoded_imgs = autoencoder.predict(x_test[:10])
n = 10
plt.figure(figsize=(20, 4))
for i in range(n):
# Display original images
ax = plt.subplot(2, n, i + 1)
plt.imshow(x_test[i].reshape(28, 28), cmap='gray')
plt.title("Original")
# Display reconstructed images
ax = plt.subplot(2, n, i + 1 + n)
plt.imshow(decoded_imgs[i].reshape(28, 28), cmap='gray')
plt.title("Reconstructed")
plt.show()
In this exercise:
- We built a simple convolutional autoencoder.
- The model is trained to reconstruct MNIST images, learning a compressed representation in the encoder.
- We visualize the original and reconstructed images to evaluate the performance.
Exercise 2: Implement a Variational Autoencoder (VAE)
Task: Implement a Variational Autoencoder (VAE) and train it on the MNIST dataset. After training, sample from the learned latent space and generate new handwritten digits.
Solution:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
# Sampling function for latent space
def sampling(args):
z_mean, z_log_var = args
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
return z_mean + tf.exp(0.5 * z_log_var) * epsilon
# Encoder
latent_dim = 2
inputs = layers.Input(shape=(28, 28, 1))
x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")(inputs)
x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Flatten()(x)
x = layers.Dense(16, activation="relu")(x)
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
# Latent space sampling
z = layers.Lambda(sampling, output_shape=(latent_dim,), name="z")([z_mean, z_log_var])
# Decoder
decoder_input = layers.Input(shape=(latent_dim,))
x = layers.Dense(7 * 7 * 64, activation="relu")(decoder_input)
x = layers.Reshape((7, 7, 64))(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same")(x)
decoder_output = layers.Conv2DTranspose(1, 3, activation="sigmoid", padding="same")(x)
# VAE model
encoder = models.Model(inputs, [z_mean, z_log_var, z], name="encoder")
decoder = models.Model(decoder_input, decoder_output, name="decoder")
vae_output = decoder(encoder(inputs)[2])
vae = models.Model(inputs, vae_output, name="vae")
# VAE loss function (reconstruction + KL divergence)
reconstruction_loss = tf.keras.losses.binary_crossentropy(tf.keras.backend.flatten(inputs), tf.keras.backend.flatten(vae_output))
reconstruction_loss *= 28 * 28
kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
kl_loss = tf.reduce_mean(-0.5 * tf.reduce_sum(kl_loss, axis=-1))
vae_loss = tf.reduce_mean(reconstruction_loss + kl_loss)
vae.add_loss(vae_loss)
vae.compile(optimizer="adam")
# Train the VAE
vae.fit(x_train, x_train, epochs=10, batch_size=128, validation_data=(x_test, x_test))
# Generate new images by sampling from the latent space
import matplotlib.pyplot as plt
n = 10 # Number of images to generate
figure = np.zeros((28 * n, 28 * n))
grid_x = np.linspace(-2, 2, n)
grid_y = np.linspace(-2, 2, n)
for i, yi in enumerate(grid_x):
for j, xi in enumerate(grid_y):
z_sample = np.array([[xi, yi]])
x_decoded = decoder.predict(z_sample)
digit = x_decoded[0].reshape(28, 28)
figure[i * 28: (i + 1) * 28, j * 28: (j + 1) * 28] = digit
plt.figure(figsize=(10, 10))
plt.imshow(figure, cmap='Greys_r')
plt.show()
In this exercise:
- We implemented a VAE to learn a probabilistic latent space of MNIST digits.
- The model was trained to minimize the reconstruction error and KL divergence.
- After training, we sampled from the learned latent space to generate new images of handwritten digits.
Exercise 3: Fine-Tune a Pretrained ResNet Model for Image Classification
Task: Fine-tune a ResNet50 model pretrained on ImageNet for a new image classification task. Replace the final layer to match the number of classes in the custom dataset, and train the model on the new dataset.
Solution:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
# Load the ResNet50 model pretrained on ImageNet, excluding the top layer
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
# Freeze the base model layers
for layer in base_model.layers:
layer.trainable = False
# Add custom layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(10, activation='softmax')(x) # Output for 10 classes
# Create new model
model = Model(inputs=base_model.input, outputs=predictions)
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
# Example training (assuming new_data and labels are prepared)
# model.fit(new_data, labels, epochs=5, batch_size=32)
print("Model fine-tuned and ready for custom classification task.")
In this exercise:
- We used a ResNet50 model pretrained on ImageNet and fine-tuned it for a new image classification task with 10 classes.
- The final layer of the model was replaced to match the number of classes, and the base layers were frozen to retain the learned features.
Exercise 4: Self-Supervised Learning with Contrastive Loss
Task: Implement a self-supervised learning task using the SimCLR framework. Train a model to learn useful image representations using contrastive learning, and evaluate its performance on a downstream classification task.
Solution:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
class SimCLR(nn.Module):
def __init__(self, base_model, out_dim):
super(SimCLR, self).__init__()
self.encoder = base_model
self.projection = nn.Sequential(
nn.Linear(base_model.fc.in_features, 512),
nn.ReLU(),
nn.Linear(512, out_dim)
)
self.encoder.fc = nn.Identity() # Remove fully connected layer
def forward(self, x):
features = self.encoder(x)
projections = self.projection(features)
return projections
# Define a contrastive loss function
def contrastive_loss(z_i, z_j, temperature=0.5):
batch_size = z_i.size(0)
z = torch.cat([z_i, z_j], dim=0)
sim_matrix = torch.mm(z, z.t()) / temperature
labels = torch.arange(batch_size).cuda()
labels = torch.cat([labels, labels], dim=0)
mask = torch.eye(sim_matrix.size(0), device=sim_matrix.device).bool()
sim_matrix = sim_matrix.masked_fill(mask, -float('inf'))
loss = nn.CrossEntropyLoss()(sim_matrix, labels)
return loss
# Example training loop (assuming the dataset and dataloader are defined)
base_model = models.resnet18(pretrained=True)
simclr_model = SimCLR(base_model, out_dim=128).cuda()
optimizer = optim.Adam(simclr_model.parameters(), lr=0.001)
for epoch in range(10):
for images, _ in dataloader:
view_1, view_2 = images.cuda(), images.cuda()
z_i = simclr_model(view_1)
z_j = simclr_model(view_2)
loss = contrastive_loss(z_i, z_j)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")
In this exercise:
- We implemented a SimCLR model using self-supervised contrastive learning.
- The model learned representations by contrasting positive pairs (two augmented views of the same image) against negative pairs (views from other images).
- After pretraining, these learned representations can be used for downstream tasks like classification.
These practical exercises cover a wide range of advanced deep learning concepts, including autoencoders, VAEs, transfer learning, and self-supervised learning. By completing these exercises, you’ll gain hands-on experience in building and fine-tuning models, as well as in utilizing unsupervised learning techniques to learn useful representations from unlabeled data.