Menu iconMenu iconGenerative Deep Learning Updated Edition
Generative Deep Learning Updated Edition

Chapter 5: Exploring Variational Autoencoders (VAEs)

5.8 Practical Exercises - Chapter 5: Exploring Variational Autoencoders (VAEs)

This section provides practical exercises to reinforce your understanding of Variational Autoencoders (VAEs) and their variations. Each exercise includes a problem statement and a solution with code examples where applicable.

Exercise 1: Implement a Basic VAE

Problem Statement: Implement a basic Variational Autoencoder (VAE) using TensorFlow and Keras. Train the VAE on the MNIST dataset and visualize the reconstructed images.

Solution:

import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Lambda, Layer
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
import numpy as np
import matplotlib.pyplot as plt

# Load the MNIST dataset
(x_train, _), (x_test, _) = tf.keras.datasets.mnist.load_data()
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = x_train.reshape((x_train.shape[0], -1))
x_test = x_test.reshape((x_test.shape[0], -1))

# Sampling layer using the reparameterization trick
class Sampling(Layer):
    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = K.random_normal(shape=(batch, dim))
        return z_mean + K.exp(0.5 * z_log_var) * epsilon

# Encoder network
def build_encoder(input_shape, latent_dim):
    inputs = Input(shape=input_shape)
    x = Dense(512, activation='relu')(inputs)
    x = Dense(256, activation='relu')(x)
    z_mean = Dense(latent_dim, name='z_mean')(x)
    z_log_var = Dense(latent_dim, name='z_log_var')(x)
    z = Sampling()([z_mean, z_log_var])
    return Model(inputs, [z_mean, z_log_var, z], name='encoder')

# Decoder network
def build_decoder(latent_dim, output_shape):
    latent_inputs = Input

```python
(shape=(latent_dim,))
    x = Dense(256, activation='relu')(latent_inputs)
    x = Dense(512, activation='relu')(x)
    outputs = Dense(output_shape, activation='sigmoid')(x)
    return Model(latent_inputs, outputs, name='decoder')

# Define the input shape and latent dimension
input_shape = (784,)
latent_dim = 2

# Build the encoder and decoder
encoder = build_encoder(input_shape, latent_dim)
decoder = build_decoder(latent_dim, input_shape[0])

# Define the VAE model
inputs = Input(shape=input_shape)
z_mean, z_log_var, z = encoder(inputs)
outputs = decoder(z)
vae = Model(inputs, outputs, name='vae')

# Define the VAE loss function
def vae_loss(inputs, outputs, z_mean, z_log_var):
    reconstruction_loss = tf.keras.losses.binary_crossentropy(inputs, outputs)
    reconstruction_loss *= input_shape[0]

    kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
    kl_loss = K.sum(kl_loss, axis=-1)
    kl_loss *= -0.5

    return K.mean(reconstruction_loss + kl_loss)

# Compile the VAE model
vae.compile(optimizer='adam', loss=lambda x, y: vae_loss(x, y, z_mean, z_log_var))

# Train the VAE model
vae.fit(x_train, x_train, epochs=50, batch_size=128, validation_data=(x_test, x_test))

# Reconstruct images
def reconstruct_images(vae, x_test, n_samples=10):
    reconstructed_images = vae.predict(x_test[:n_samples])
    original_images = x_test[:n_samples].reshape((n_samples, 28, 28))
    reconstructed_images = reconstructed_images.reshape((n_samples, 28, 28))

    plt.figure(figsize=(10, 4))
    for i in range(n_samples):
        plt.subplot(2, n_samples, i + 1)
        plt.imshow(original_images[i], cmap='gray')
        plt.axis('off')
        plt.subplot(2, n_samples, n_samples + i + 1)
        plt.imshow(reconstructed_images[i], cmap='gray')
        plt.axis('off')
    plt.show()

# Visualize the reconstructed images
reconstruct_images(vae, x_test)

Exercise 2: Implement a Beta-VAE

Problem Statement: Implement a Beta-VAE by modifying the basic VAE to include a beta hyperparameter. Train the Beta-VAE on the MNIST dataset and visualize the latent space.

Solution:

# Define the Beta-VAE loss function
def beta_vae_loss(inputs, outputs, z_mean, z_log_var, beta=4.0):
    reconstruction_loss = tf.keras.losses.binary_crossentropy(inputs, outputs)
    reconstruction_loss *= input_shape[0]

    kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
    kl_loss = K.sum(kl_loss, axis=-1)
    kl_loss *= -0.5

    return K.mean(reconstruction_loss + beta * kl_loss)

# Compile the Beta-VAE model
vae.compile(optimizer='adam', loss=lambda x, y: beta_vae_loss(x, y, z_mean, z_log_var, beta=4.0))

# Train the Beta-VAE model
vae.fit(x_train, x_train, epochs=50, batch_size=128, validation_data=(x_test, x_test))

# Visualize the latent space
def plot_latent_space(encoder, x_test, y_test, n_samples=10000):
    z_mean, _, _ = encoder.predict(x_test)
    plt.figure(figsize=(10, 10))
    scatter = plt.scatter(z_mean[:n_samples, 0], z_mean[:n_samples, 1], c=y_test[:n_samples], cmap='viridis')
    plt.colorbar(scatter)
    plt.xlabel('z[0]')
    plt.ylabel('z[1]')
    plt.title('Latent Space')
    plt.show()

# Plot the latent space
plot_latent_space(encoder, x_test, _)

Exercise 3: Implement a Conditional VAE (CVAE)

Problem Statement: Implement a Conditional VAE (CVAE) that conditions on the digit labels from the MNIST dataset. Train the CVAE and generate images conditioned on specific labels.

Solution:

from tensorflow.keras.layers import Concatenate

# Encoder network for CVAE
def build_cvae_encoder(input_shape, num_classes, latent_dim):
    inputs = Input(shape=input_shape)
    labels = Input(shape=(num_classes,))
    x = Dense(512, activation='relu')(inputs)
    x = Concatenate()([x, labels])
    x = Dense(256, activation='relu')(x)
    z_mean = Dense(latent_dim, name='z_mean')(x)
    z_log_var = Dense(latent_dim, name='z_log_var')(x)
    z = Sampling()([z_mean, z_log_var])
    return Model([inputs, labels], [z_mean, z_log_var, z], name='cvae_encoder')

# Decoder network for CVAE
def build_cvae_decoder(latent_dim, num_classes, output_shape):
    latent_inputs = Input(shape=(latent_dim,))
    labels = Input(shape=(num_classes,))
    x = Dense(256, activation='relu')(latent_inputs)
    x = Concatenate()([x, labels])
    x = Dense(512, activation='relu')(x)
    outputs = Dense(output_shape, activation='sigmoid')(x)
    return Model([latent_inputs, labels], outputs, name='cvae_decoder')

# Define the input shape, number of classes, and latent dimension
input_shape = (784,)
num_classes = 10
latent_dim = 2

# Build the encoder and decoder for CVAE
cvae_encoder = build_cvae_encoder(input_shape, num_classes, latent_dim)
cvae_decoder = build_cvae_decoder(latent_dim, num_classes, input_shape[0])

# Define the Conditional VAE model
inputs = Input(shape=input_shape)
labels = Input(shape=(num_classes,))
z_mean, z_log_var, z = cvae_encoder([inputs, labels])
outputs = cvae_decoder([z, labels])
cvae = Model([inputs, labels], outputs, name='cvae')

# Define the CVAE loss function
def cvae_loss(inputs, outputs, z_mean, z_log_var):
    reconstruction_loss = tf.keras.losses.binary_crossentropy(inputs, outputs)
    reconstruction_loss *= input_shape[0]

    kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
    kl_loss = K.sum(kl_loss, axis=-1)
    kl_loss *= -0.5

    return K.mean(reconstruction_loss + kl_loss)

# Compile the CVAE model
cvae.compile(optimizer='adam', loss=lambda x, y: cvae_loss(x, y, z_mean, z_log_var))

# Prepare the labels for training
y_train = tf.keras.utils.to_categorical(_, num_classes)
y_test = tf.keras.utils.to_categorical(_, num_classes)

# Train the CVAE model
cvae.fit([x_train, y_train], x_train, epochs=50, batch_size=128, validation_data=([x_test, y_test], x_test))

# Generate images conditioned on specific labels
def generate_conditioned_images(cvae_decoder, label, latent_dim, num_classes, n_samples=10):
    label_vector = tf.keras.utils.to_categorical([label] * n_samples, num_classes)
    random_latent_vectors = np.random.normal(size=(n_samples, latent_dim))
    generated_images = cvae_decoder.predict([random_latent_vectors, label_vector])
    generated_images = generated_images.reshape((n_samples, 28, 28))

    plt.figure(figsize=(10, 2))
    for i in range(n_samples):
        plt.subplot(1, n_samples, i + 1)
        plt.imshow(generated_images[i], cmap='gray')
        plt.axis('off')
    plt.show()

# Generate and visualize images conditioned on the digit '5'
generate_conditioned_images(cvae_decoder, 5, latent_dim, num_classes)

These practical exercises provide hands-on experience with Variational Autoencoders (VAEs) and their variations. By implementing a basic VAE, Beta-VAE, and Conditional VAE, you can deepen your understanding of these models and their applications.

These exercises cover essential aspects of VAEs, including image generation, reconstruction, visualization of latent spaces, and conditional generation, helping you to apply these techniques to various tasks in generative modeling.

5.8 Practical Exercises - Chapter 5: Exploring Variational Autoencoders (VAEs)

This section provides practical exercises to reinforce your understanding of Variational Autoencoders (VAEs) and their variations. Each exercise includes a problem statement and a solution with code examples where applicable.

Exercise 1: Implement a Basic VAE

Problem Statement: Implement a basic Variational Autoencoder (VAE) using TensorFlow and Keras. Train the VAE on the MNIST dataset and visualize the reconstructed images.

Solution:

import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Lambda, Layer
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
import numpy as np
import matplotlib.pyplot as plt

# Load the MNIST dataset
(x_train, _), (x_test, _) = tf.keras.datasets.mnist.load_data()
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = x_train.reshape((x_train.shape[0], -1))
x_test = x_test.reshape((x_test.shape[0], -1))

# Sampling layer using the reparameterization trick
class Sampling(Layer):
    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = K.random_normal(shape=(batch, dim))
        return z_mean + K.exp(0.5 * z_log_var) * epsilon

# Encoder network
def build_encoder(input_shape, latent_dim):
    inputs = Input(shape=input_shape)
    x = Dense(512, activation='relu')(inputs)
    x = Dense(256, activation='relu')(x)
    z_mean = Dense(latent_dim, name='z_mean')(x)
    z_log_var = Dense(latent_dim, name='z_log_var')(x)
    z = Sampling()([z_mean, z_log_var])
    return Model(inputs, [z_mean, z_log_var, z], name='encoder')

# Decoder network
def build_decoder(latent_dim, output_shape):
    latent_inputs = Input

```python
(shape=(latent_dim,))
    x = Dense(256, activation='relu')(latent_inputs)
    x = Dense(512, activation='relu')(x)
    outputs = Dense(output_shape, activation='sigmoid')(x)
    return Model(latent_inputs, outputs, name='decoder')

# Define the input shape and latent dimension
input_shape = (784,)
latent_dim = 2

# Build the encoder and decoder
encoder = build_encoder(input_shape, latent_dim)
decoder = build_decoder(latent_dim, input_shape[0])

# Define the VAE model
inputs = Input(shape=input_shape)
z_mean, z_log_var, z = encoder(inputs)
outputs = decoder(z)
vae = Model(inputs, outputs, name='vae')

# Define the VAE loss function
def vae_loss(inputs, outputs, z_mean, z_log_var):
    reconstruction_loss = tf.keras.losses.binary_crossentropy(inputs, outputs)
    reconstruction_loss *= input_shape[0]

    kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
    kl_loss = K.sum(kl_loss, axis=-1)
    kl_loss *= -0.5

    return K.mean(reconstruction_loss + kl_loss)

# Compile the VAE model
vae.compile(optimizer='adam', loss=lambda x, y: vae_loss(x, y, z_mean, z_log_var))

# Train the VAE model
vae.fit(x_train, x_train, epochs=50, batch_size=128, validation_data=(x_test, x_test))

# Reconstruct images
def reconstruct_images(vae, x_test, n_samples=10):
    reconstructed_images = vae.predict(x_test[:n_samples])
    original_images = x_test[:n_samples].reshape((n_samples, 28, 28))
    reconstructed_images = reconstructed_images.reshape((n_samples, 28, 28))

    plt.figure(figsize=(10, 4))
    for i in range(n_samples):
        plt.subplot(2, n_samples, i + 1)
        plt.imshow(original_images[i], cmap='gray')
        plt.axis('off')
        plt.subplot(2, n_samples, n_samples + i + 1)
        plt.imshow(reconstructed_images[i], cmap='gray')
        plt.axis('off')
    plt.show()

# Visualize the reconstructed images
reconstruct_images(vae, x_test)

Exercise 2: Implement a Beta-VAE

Problem Statement: Implement a Beta-VAE by modifying the basic VAE to include a beta hyperparameter. Train the Beta-VAE on the MNIST dataset and visualize the latent space.

Solution:

# Define the Beta-VAE loss function
def beta_vae_loss(inputs, outputs, z_mean, z_log_var, beta=4.0):
    reconstruction_loss = tf.keras.losses.binary_crossentropy(inputs, outputs)
    reconstruction_loss *= input_shape[0]

    kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
    kl_loss = K.sum(kl_loss, axis=-1)
    kl_loss *= -0.5

    return K.mean(reconstruction_loss + beta * kl_loss)

# Compile the Beta-VAE model
vae.compile(optimizer='adam', loss=lambda x, y: beta_vae_loss(x, y, z_mean, z_log_var, beta=4.0))

# Train the Beta-VAE model
vae.fit(x_train, x_train, epochs=50, batch_size=128, validation_data=(x_test, x_test))

# Visualize the latent space
def plot_latent_space(encoder, x_test, y_test, n_samples=10000):
    z_mean, _, _ = encoder.predict(x_test)
    plt.figure(figsize=(10, 10))
    scatter = plt.scatter(z_mean[:n_samples, 0], z_mean[:n_samples, 1], c=y_test[:n_samples], cmap='viridis')
    plt.colorbar(scatter)
    plt.xlabel('z[0]')
    plt.ylabel('z[1]')
    plt.title('Latent Space')
    plt.show()

# Plot the latent space
plot_latent_space(encoder, x_test, _)

Exercise 3: Implement a Conditional VAE (CVAE)

Problem Statement: Implement a Conditional VAE (CVAE) that conditions on the digit labels from the MNIST dataset. Train the CVAE and generate images conditioned on specific labels.

Solution:

from tensorflow.keras.layers import Concatenate

# Encoder network for CVAE
def build_cvae_encoder(input_shape, num_classes, latent_dim):
    inputs = Input(shape=input_shape)
    labels = Input(shape=(num_classes,))
    x = Dense(512, activation='relu')(inputs)
    x = Concatenate()([x, labels])
    x = Dense(256, activation='relu')(x)
    z_mean = Dense(latent_dim, name='z_mean')(x)
    z_log_var = Dense(latent_dim, name='z_log_var')(x)
    z = Sampling()([z_mean, z_log_var])
    return Model([inputs, labels], [z_mean, z_log_var, z], name='cvae_encoder')

# Decoder network for CVAE
def build_cvae_decoder(latent_dim, num_classes, output_shape):
    latent_inputs = Input(shape=(latent_dim,))
    labels = Input(shape=(num_classes,))
    x = Dense(256, activation='relu')(latent_inputs)
    x = Concatenate()([x, labels])
    x = Dense(512, activation='relu')(x)
    outputs = Dense(output_shape, activation='sigmoid')(x)
    return Model([latent_inputs, labels], outputs, name='cvae_decoder')

# Define the input shape, number of classes, and latent dimension
input_shape = (784,)
num_classes = 10
latent_dim = 2

# Build the encoder and decoder for CVAE
cvae_encoder = build_cvae_encoder(input_shape, num_classes, latent_dim)
cvae_decoder = build_cvae_decoder(latent_dim, num_classes, input_shape[0])

# Define the Conditional VAE model
inputs = Input(shape=input_shape)
labels = Input(shape=(num_classes,))
z_mean, z_log_var, z = cvae_encoder([inputs, labels])
outputs = cvae_decoder([z, labels])
cvae = Model([inputs, labels], outputs, name='cvae')

# Define the CVAE loss function
def cvae_loss(inputs, outputs, z_mean, z_log_var):
    reconstruction_loss = tf.keras.losses.binary_crossentropy(inputs, outputs)
    reconstruction_loss *= input_shape[0]

    kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
    kl_loss = K.sum(kl_loss, axis=-1)
    kl_loss *= -0.5

    return K.mean(reconstruction_loss + kl_loss)

# Compile the CVAE model
cvae.compile(optimizer='adam', loss=lambda x, y: cvae_loss(x, y, z_mean, z_log_var))

# Prepare the labels for training
y_train = tf.keras.utils.to_categorical(_, num_classes)
y_test = tf.keras.utils.to_categorical(_, num_classes)

# Train the CVAE model
cvae.fit([x_train, y_train], x_train, epochs=50, batch_size=128, validation_data=([x_test, y_test], x_test))

# Generate images conditioned on specific labels
def generate_conditioned_images(cvae_decoder, label, latent_dim, num_classes, n_samples=10):
    label_vector = tf.keras.utils.to_categorical([label] * n_samples, num_classes)
    random_latent_vectors = np.random.normal(size=(n_samples, latent_dim))
    generated_images = cvae_decoder.predict([random_latent_vectors, label_vector])
    generated_images = generated_images.reshape((n_samples, 28, 28))

    plt.figure(figsize=(10, 2))
    for i in range(n_samples):
        plt.subplot(1, n_samples, i + 1)
        plt.imshow(generated_images[i], cmap='gray')
        plt.axis('off')
    plt.show()

# Generate and visualize images conditioned on the digit '5'
generate_conditioned_images(cvae_decoder, 5, latent_dim, num_classes)

These practical exercises provide hands-on experience with Variational Autoencoders (VAEs) and their variations. By implementing a basic VAE, Beta-VAE, and Conditional VAE, you can deepen your understanding of these models and their applications.

These exercises cover essential aspects of VAEs, including image generation, reconstruction, visualization of latent spaces, and conditional generation, helping you to apply these techniques to various tasks in generative modeling.

5.8 Practical Exercises - Chapter 5: Exploring Variational Autoencoders (VAEs)

This section provides practical exercises to reinforce your understanding of Variational Autoencoders (VAEs) and their variations. Each exercise includes a problem statement and a solution with code examples where applicable.

Exercise 1: Implement a Basic VAE

Problem Statement: Implement a basic Variational Autoencoder (VAE) using TensorFlow and Keras. Train the VAE on the MNIST dataset and visualize the reconstructed images.

Solution:

import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Lambda, Layer
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
import numpy as np
import matplotlib.pyplot as plt

# Load the MNIST dataset
(x_train, _), (x_test, _) = tf.keras.datasets.mnist.load_data()
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = x_train.reshape((x_train.shape[0], -1))
x_test = x_test.reshape((x_test.shape[0], -1))

# Sampling layer using the reparameterization trick
class Sampling(Layer):
    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = K.random_normal(shape=(batch, dim))
        return z_mean + K.exp(0.5 * z_log_var) * epsilon

# Encoder network
def build_encoder(input_shape, latent_dim):
    inputs = Input(shape=input_shape)
    x = Dense(512, activation='relu')(inputs)
    x = Dense(256, activation='relu')(x)
    z_mean = Dense(latent_dim, name='z_mean')(x)
    z_log_var = Dense(latent_dim, name='z_log_var')(x)
    z = Sampling()([z_mean, z_log_var])
    return Model(inputs, [z_mean, z_log_var, z], name='encoder')

# Decoder network
def build_decoder(latent_dim, output_shape):
    latent_inputs = Input

```python
(shape=(latent_dim,))
    x = Dense(256, activation='relu')(latent_inputs)
    x = Dense(512, activation='relu')(x)
    outputs = Dense(output_shape, activation='sigmoid')(x)
    return Model(latent_inputs, outputs, name='decoder')

# Define the input shape and latent dimension
input_shape = (784,)
latent_dim = 2

# Build the encoder and decoder
encoder = build_encoder(input_shape, latent_dim)
decoder = build_decoder(latent_dim, input_shape[0])

# Define the VAE model
inputs = Input(shape=input_shape)
z_mean, z_log_var, z = encoder(inputs)
outputs = decoder(z)
vae = Model(inputs, outputs, name='vae')

# Define the VAE loss function
def vae_loss(inputs, outputs, z_mean, z_log_var):
    reconstruction_loss = tf.keras.losses.binary_crossentropy(inputs, outputs)
    reconstruction_loss *= input_shape[0]

    kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
    kl_loss = K.sum(kl_loss, axis=-1)
    kl_loss *= -0.5

    return K.mean(reconstruction_loss + kl_loss)

# Compile the VAE model
vae.compile(optimizer='adam', loss=lambda x, y: vae_loss(x, y, z_mean, z_log_var))

# Train the VAE model
vae.fit(x_train, x_train, epochs=50, batch_size=128, validation_data=(x_test, x_test))

# Reconstruct images
def reconstruct_images(vae, x_test, n_samples=10):
    reconstructed_images = vae.predict(x_test[:n_samples])
    original_images = x_test[:n_samples].reshape((n_samples, 28, 28))
    reconstructed_images = reconstructed_images.reshape((n_samples, 28, 28))

    plt.figure(figsize=(10, 4))
    for i in range(n_samples):
        plt.subplot(2, n_samples, i + 1)
        plt.imshow(original_images[i], cmap='gray')
        plt.axis('off')
        plt.subplot(2, n_samples, n_samples + i + 1)
        plt.imshow(reconstructed_images[i], cmap='gray')
        plt.axis('off')
    plt.show()

# Visualize the reconstructed images
reconstruct_images(vae, x_test)

Exercise 2: Implement a Beta-VAE

Problem Statement: Implement a Beta-VAE by modifying the basic VAE to include a beta hyperparameter. Train the Beta-VAE on the MNIST dataset and visualize the latent space.

Solution:

# Define the Beta-VAE loss function
def beta_vae_loss(inputs, outputs, z_mean, z_log_var, beta=4.0):
    reconstruction_loss = tf.keras.losses.binary_crossentropy(inputs, outputs)
    reconstruction_loss *= input_shape[0]

    kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
    kl_loss = K.sum(kl_loss, axis=-1)
    kl_loss *= -0.5

    return K.mean(reconstruction_loss + beta * kl_loss)

# Compile the Beta-VAE model
vae.compile(optimizer='adam', loss=lambda x, y: beta_vae_loss(x, y, z_mean, z_log_var, beta=4.0))

# Train the Beta-VAE model
vae.fit(x_train, x_train, epochs=50, batch_size=128, validation_data=(x_test, x_test))

# Visualize the latent space
def plot_latent_space(encoder, x_test, y_test, n_samples=10000):
    z_mean, _, _ = encoder.predict(x_test)
    plt.figure(figsize=(10, 10))
    scatter = plt.scatter(z_mean[:n_samples, 0], z_mean[:n_samples, 1], c=y_test[:n_samples], cmap='viridis')
    plt.colorbar(scatter)
    plt.xlabel('z[0]')
    plt.ylabel('z[1]')
    plt.title('Latent Space')
    plt.show()

# Plot the latent space
plot_latent_space(encoder, x_test, _)

Exercise 3: Implement a Conditional VAE (CVAE)

Problem Statement: Implement a Conditional VAE (CVAE) that conditions on the digit labels from the MNIST dataset. Train the CVAE and generate images conditioned on specific labels.

Solution:

from tensorflow.keras.layers import Concatenate

# Encoder network for CVAE
def build_cvae_encoder(input_shape, num_classes, latent_dim):
    inputs = Input(shape=input_shape)
    labels = Input(shape=(num_classes,))
    x = Dense(512, activation='relu')(inputs)
    x = Concatenate()([x, labels])
    x = Dense(256, activation='relu')(x)
    z_mean = Dense(latent_dim, name='z_mean')(x)
    z_log_var = Dense(latent_dim, name='z_log_var')(x)
    z = Sampling()([z_mean, z_log_var])
    return Model([inputs, labels], [z_mean, z_log_var, z], name='cvae_encoder')

# Decoder network for CVAE
def build_cvae_decoder(latent_dim, num_classes, output_shape):
    latent_inputs = Input(shape=(latent_dim,))
    labels = Input(shape=(num_classes,))
    x = Dense(256, activation='relu')(latent_inputs)
    x = Concatenate()([x, labels])
    x = Dense(512, activation='relu')(x)
    outputs = Dense(output_shape, activation='sigmoid')(x)
    return Model([latent_inputs, labels], outputs, name='cvae_decoder')

# Define the input shape, number of classes, and latent dimension
input_shape = (784,)
num_classes = 10
latent_dim = 2

# Build the encoder and decoder for CVAE
cvae_encoder = build_cvae_encoder(input_shape, num_classes, latent_dim)
cvae_decoder = build_cvae_decoder(latent_dim, num_classes, input_shape[0])

# Define the Conditional VAE model
inputs = Input(shape=input_shape)
labels = Input(shape=(num_classes,))
z_mean, z_log_var, z = cvae_encoder([inputs, labels])
outputs = cvae_decoder([z, labels])
cvae = Model([inputs, labels], outputs, name='cvae')

# Define the CVAE loss function
def cvae_loss(inputs, outputs, z_mean, z_log_var):
    reconstruction_loss = tf.keras.losses.binary_crossentropy(inputs, outputs)
    reconstruction_loss *= input_shape[0]

    kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
    kl_loss = K.sum(kl_loss, axis=-1)
    kl_loss *= -0.5

    return K.mean(reconstruction_loss + kl_loss)

# Compile the CVAE model
cvae.compile(optimizer='adam', loss=lambda x, y: cvae_loss(x, y, z_mean, z_log_var))

# Prepare the labels for training
y_train = tf.keras.utils.to_categorical(_, num_classes)
y_test = tf.keras.utils.to_categorical(_, num_classes)

# Train the CVAE model
cvae.fit([x_train, y_train], x_train, epochs=50, batch_size=128, validation_data=([x_test, y_test], x_test))

# Generate images conditioned on specific labels
def generate_conditioned_images(cvae_decoder, label, latent_dim, num_classes, n_samples=10):
    label_vector = tf.keras.utils.to_categorical([label] * n_samples, num_classes)
    random_latent_vectors = np.random.normal(size=(n_samples, latent_dim))
    generated_images = cvae_decoder.predict([random_latent_vectors, label_vector])
    generated_images = generated_images.reshape((n_samples, 28, 28))

    plt.figure(figsize=(10, 2))
    for i in range(n_samples):
        plt.subplot(1, n_samples, i + 1)
        plt.imshow(generated_images[i], cmap='gray')
        plt.axis('off')
    plt.show()

# Generate and visualize images conditioned on the digit '5'
generate_conditioned_images(cvae_decoder, 5, latent_dim, num_classes)

These practical exercises provide hands-on experience with Variational Autoencoders (VAEs) and their variations. By implementing a basic VAE, Beta-VAE, and Conditional VAE, you can deepen your understanding of these models and their applications.

These exercises cover essential aspects of VAEs, including image generation, reconstruction, visualization of latent spaces, and conditional generation, helping you to apply these techniques to various tasks in generative modeling.

5.8 Practical Exercises - Chapter 5: Exploring Variational Autoencoders (VAEs)

This section provides practical exercises to reinforce your understanding of Variational Autoencoders (VAEs) and their variations. Each exercise includes a problem statement and a solution with code examples where applicable.

Exercise 1: Implement a Basic VAE

Problem Statement: Implement a basic Variational Autoencoder (VAE) using TensorFlow and Keras. Train the VAE on the MNIST dataset and visualize the reconstructed images.

Solution:

import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Lambda, Layer
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
import numpy as np
import matplotlib.pyplot as plt

# Load the MNIST dataset
(x_train, _), (x_test, _) = tf.keras.datasets.mnist.load_data()
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = x_train.reshape((x_train.shape[0], -1))
x_test = x_test.reshape((x_test.shape[0], -1))

# Sampling layer using the reparameterization trick
class Sampling(Layer):
    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = K.random_normal(shape=(batch, dim))
        return z_mean + K.exp(0.5 * z_log_var) * epsilon

# Encoder network
def build_encoder(input_shape, latent_dim):
    inputs = Input(shape=input_shape)
    x = Dense(512, activation='relu')(inputs)
    x = Dense(256, activation='relu')(x)
    z_mean = Dense(latent_dim, name='z_mean')(x)
    z_log_var = Dense(latent_dim, name='z_log_var')(x)
    z = Sampling()([z_mean, z_log_var])
    return Model(inputs, [z_mean, z_log_var, z], name='encoder')

# Decoder network
def build_decoder(latent_dim, output_shape):
    latent_inputs = Input

```python
(shape=(latent_dim,))
    x = Dense(256, activation='relu')(latent_inputs)
    x = Dense(512, activation='relu')(x)
    outputs = Dense(output_shape, activation='sigmoid')(x)
    return Model(latent_inputs, outputs, name='decoder')

# Define the input shape and latent dimension
input_shape = (784,)
latent_dim = 2

# Build the encoder and decoder
encoder = build_encoder(input_shape, latent_dim)
decoder = build_decoder(latent_dim, input_shape[0])

# Define the VAE model
inputs = Input(shape=input_shape)
z_mean, z_log_var, z = encoder(inputs)
outputs = decoder(z)
vae = Model(inputs, outputs, name='vae')

# Define the VAE loss function
def vae_loss(inputs, outputs, z_mean, z_log_var):
    reconstruction_loss = tf.keras.losses.binary_crossentropy(inputs, outputs)
    reconstruction_loss *= input_shape[0]

    kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
    kl_loss = K.sum(kl_loss, axis=-1)
    kl_loss *= -0.5

    return K.mean(reconstruction_loss + kl_loss)

# Compile the VAE model
vae.compile(optimizer='adam', loss=lambda x, y: vae_loss(x, y, z_mean, z_log_var))

# Train the VAE model
vae.fit(x_train, x_train, epochs=50, batch_size=128, validation_data=(x_test, x_test))

# Reconstruct images
def reconstruct_images(vae, x_test, n_samples=10):
    reconstructed_images = vae.predict(x_test[:n_samples])
    original_images = x_test[:n_samples].reshape((n_samples, 28, 28))
    reconstructed_images = reconstructed_images.reshape((n_samples, 28, 28))

    plt.figure(figsize=(10, 4))
    for i in range(n_samples):
        plt.subplot(2, n_samples, i + 1)
        plt.imshow(original_images[i], cmap='gray')
        plt.axis('off')
        plt.subplot(2, n_samples, n_samples + i + 1)
        plt.imshow(reconstructed_images[i], cmap='gray')
        plt.axis('off')
    plt.show()

# Visualize the reconstructed images
reconstruct_images(vae, x_test)

Exercise 2: Implement a Beta-VAE

Problem Statement: Implement a Beta-VAE by modifying the basic VAE to include a beta hyperparameter. Train the Beta-VAE on the MNIST dataset and visualize the latent space.

Solution:

# Define the Beta-VAE loss function
def beta_vae_loss(inputs, outputs, z_mean, z_log_var, beta=4.0):
    reconstruction_loss = tf.keras.losses.binary_crossentropy(inputs, outputs)
    reconstruction_loss *= input_shape[0]

    kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
    kl_loss = K.sum(kl_loss, axis=-1)
    kl_loss *= -0.5

    return K.mean(reconstruction_loss + beta * kl_loss)

# Compile the Beta-VAE model
vae.compile(optimizer='adam', loss=lambda x, y: beta_vae_loss(x, y, z_mean, z_log_var, beta=4.0))

# Train the Beta-VAE model
vae.fit(x_train, x_train, epochs=50, batch_size=128, validation_data=(x_test, x_test))

# Visualize the latent space
def plot_latent_space(encoder, x_test, y_test, n_samples=10000):
    z_mean, _, _ = encoder.predict(x_test)
    plt.figure(figsize=(10, 10))
    scatter = plt.scatter(z_mean[:n_samples, 0], z_mean[:n_samples, 1], c=y_test[:n_samples], cmap='viridis')
    plt.colorbar(scatter)
    plt.xlabel('z[0]')
    plt.ylabel('z[1]')
    plt.title('Latent Space')
    plt.show()

# Plot the latent space
plot_latent_space(encoder, x_test, _)

Exercise 3: Implement a Conditional VAE (CVAE)

Problem Statement: Implement a Conditional VAE (CVAE) that conditions on the digit labels from the MNIST dataset. Train the CVAE and generate images conditioned on specific labels.

Solution:

from tensorflow.keras.layers import Concatenate

# Encoder network for CVAE
def build_cvae_encoder(input_shape, num_classes, latent_dim):
    inputs = Input(shape=input_shape)
    labels = Input(shape=(num_classes,))
    x = Dense(512, activation='relu')(inputs)
    x = Concatenate()([x, labels])
    x = Dense(256, activation='relu')(x)
    z_mean = Dense(latent_dim, name='z_mean')(x)
    z_log_var = Dense(latent_dim, name='z_log_var')(x)
    z = Sampling()([z_mean, z_log_var])
    return Model([inputs, labels], [z_mean, z_log_var, z], name='cvae_encoder')

# Decoder network for CVAE
def build_cvae_decoder(latent_dim, num_classes, output_shape):
    latent_inputs = Input(shape=(latent_dim,))
    labels = Input(shape=(num_classes,))
    x = Dense(256, activation='relu')(latent_inputs)
    x = Concatenate()([x, labels])
    x = Dense(512, activation='relu')(x)
    outputs = Dense(output_shape, activation='sigmoid')(x)
    return Model([latent_inputs, labels], outputs, name='cvae_decoder')

# Define the input shape, number of classes, and latent dimension
input_shape = (784,)
num_classes = 10
latent_dim = 2

# Build the encoder and decoder for CVAE
cvae_encoder = build_cvae_encoder(input_shape, num_classes, latent_dim)
cvae_decoder = build_cvae_decoder(latent_dim, num_classes, input_shape[0])

# Define the Conditional VAE model
inputs = Input(shape=input_shape)
labels = Input(shape=(num_classes,))
z_mean, z_log_var, z = cvae_encoder([inputs, labels])
outputs = cvae_decoder([z, labels])
cvae = Model([inputs, labels], outputs, name='cvae')

# Define the CVAE loss function
def cvae_loss(inputs, outputs, z_mean, z_log_var):
    reconstruction_loss = tf.keras.losses.binary_crossentropy(inputs, outputs)
    reconstruction_loss *= input_shape[0]

    kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
    kl_loss = K.sum(kl_loss, axis=-1)
    kl_loss *= -0.5

    return K.mean(reconstruction_loss + kl_loss)

# Compile the CVAE model
cvae.compile(optimizer='adam', loss=lambda x, y: cvae_loss(x, y, z_mean, z_log_var))

# Prepare the labels for training
y_train = tf.keras.utils.to_categorical(_, num_classes)
y_test = tf.keras.utils.to_categorical(_, num_classes)

# Train the CVAE model
cvae.fit([x_train, y_train], x_train, epochs=50, batch_size=128, validation_data=([x_test, y_test], x_test))

# Generate images conditioned on specific labels
def generate_conditioned_images(cvae_decoder, label, latent_dim, num_classes, n_samples=10):
    label_vector = tf.keras.utils.to_categorical([label] * n_samples, num_classes)
    random_latent_vectors = np.random.normal(size=(n_samples, latent_dim))
    generated_images = cvae_decoder.predict([random_latent_vectors, label_vector])
    generated_images = generated_images.reshape((n_samples, 28, 28))

    plt.figure(figsize=(10, 2))
    for i in range(n_samples):
        plt.subplot(1, n_samples, i + 1)
        plt.imshow(generated_images[i], cmap='gray')
        plt.axis('off')
    plt.show()

# Generate and visualize images conditioned on the digit '5'
generate_conditioned_images(cvae_decoder, 5, latent_dim, num_classes)

These practical exercises provide hands-on experience with Variational Autoencoders (VAEs) and their variations. By implementing a basic VAE, Beta-VAE, and Conditional VAE, you can deepen your understanding of these models and their applications.

These exercises cover essential aspects of VAEs, including image generation, reconstruction, visualization of latent spaces, and conditional generation, helping you to apply these techniques to various tasks in generative modeling.