Chapter 3: Deep Dive into Generative Adversarial Networks (GANs)
3.8 Practical Exercises - Chapter 3: Deep Dive into Generative Adversarial Networks (GANs)
These practical exercises are designed to reinforce the concepts covered in this chapter. By working through these exercises, you will gain hands-on experience with GANs, including their architecture, training, evaluation, and recent innovations.
Exercise 1: Build and Train a Basic GAN
Task: Build and train a basic GAN to generate 28x28 grayscale images similar to the MNIST dataset.
Solution:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Define the generator model
def build_generator(latent_dim):
model = tf.keras.Sequential([
tf.keras.layers.Dense(256 * 7 * 7, activation="relu", input_dim=latent_dim),
tf.keras.layers.Reshape((7, 7, 256)),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Conv2DTranspose(1, kernel_size=4, strides=1, padding='same', activation='tanh')
])
return model
# Define the discriminator model
def build_discriminator(img_shape):
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same', input_shape=img_shape),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same'),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(1, activation='sigmoid')
])
return model
# Instantiate the GAN
latent_dim = 100
img_shape = (28, 28, 1)
generator = build_generator(latent_dim)
discriminator = build_discriminator(img_shape)
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
discriminator.trainable = False
gan_input = tf.keras.Input(shape=(latent_dim,))
generated_img = generator(gan_input)
validity = discriminator(generated_img)
gan = tf.keras.Model(gan_input, validity)
gan.compile(optimizer='adam', loss='binary_crossentropy')
# Load and preprocess the MNIST dataset
(x_train, _), (_, _) = tf.keras.datasets.mnist.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5 # Normalize to [-1, 1]
x_train = np.expand_dims(x_train, axis=-1)
# Training parameters
epochs = 10000
batch_size = 64
sample_interval = 1000
for epoch in range(epochs):
# Train the discriminator
idx = np.random.randint(0, x_train.shape[0], batch_size)
real_images = x_train[idx]
noise = np.random.normal(0, 1, (batch_size, latent_dim))
fake_images = generator.predict(noise)
d_loss_real = discriminator.train_on_batch(real_images, np.ones((batch_size, 1)))
d_loss_fake = discriminator.train_on_batch(fake_images, np.zeros((batch_size, 1)))
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
# Train the generator
noise = np.random.normal(0, 1, (batch_size, latent_dim))
g_loss = gan.train_on_batch(noise, np.ones((batch_size, 1)))
# Print progress
if epoch % sample_interval == 0:
print(f"{epoch} [D loss: {d_loss[0]}, acc.: {d_loss[1] * 100}%] [G loss: {g_loss}]")
# Generate and save images
noise = np.random.normal(0, 1, (10, latent_dim))
generated_images = generator.predict(noise)
fig, axs = plt.subplots(1, 10, figsize=(20, 2))
for i, img in enumerate(generated_images):
axs[i].imshow(img.squeeze(), cmap='gray')
axs[i].axis('off')
plt.show()
Exercise 2: Implement and Evaluate a DCGAN
Task: Implement a Deep Convolutional GAN (DCGAN) to generate 64x64 RGB images. Evaluate the model using Inception Score (IS) and Fréchet Inception Distance (FID).
Solution:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from scipy.linalg import sqrtm
# Define DCGAN generator model
def build_dcgan_generator(latent_dim):
model = tf.keras.Sequential([
tf.keras.layers.Dense(256 * 8 * 8, activation="relu", input_dim=latent_dim),
tf.keras.layers.Reshape((8, 8, 256)),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Conv2DTranspose(3, kernel_size=4, strides=2, padding='same', activation='tanh')
])
return model
# Define DCGAN discriminator model
def build_dcgan_discriminator(img_shape):
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same', input_shape=img_shape),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Conv2D(256, kernel_size=4, strides=2, padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(1, activation='sigmoid')
])
return model
# Training parameters
latent_dim = 100
img_shape = (64, 64, 3)
epochs = 10000
batch_size = 64
sample_interval = 1000
# Instantiate the DCGAN
generator = build_dcgan_generator(latent_dim)
discriminator = build_dcgan_discriminator(img_shape)
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
discriminator.trainable = False
gan_input = tf.keras.Input(shape=(latent_dim,))
generated_img = generator(gan_input)
validity = discriminator(generated_img)
dcgan = tf.keras.Model(gan_input, validity)
dcgan.compile(optimizer='adam', loss='binary_crossentropy')
# Load and preprocess the dataset (e.g., CIFAR-10)
(x_train, _), (_, _) = tf.keras.datasets.cifar10.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5 # Normalize to [-1, 1]
# Training loop
for epoch in range(epochs):
# Train the discriminator
idx = np.random.randint(0, x_train.shape[0], batch_size)
real_images = x_train[idx]
noise = np.random.normal(0, 1, (batch_size, latent_dim))
fake_images = generator.predict(noise)
d_loss_real = discriminator.train_on_batch(real_images, np.ones((batch_size, 1)))
d_loss_fake = discriminator.train_on_batch(fake_images, np.zeros((batch_size, 1)))
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
# Train the generator
noise = np.random.normal(0, 1, (batch_size, latent_dim))
g_loss = dcgan.train_on_batch(noise, np.ones((batch_size, 1)))
# Print progress
if epoch % sample_interval == 0:
print(f"{epoch} [D loss: {d_loss[0]}, acc.: {d_loss[1] * 100}%] [G loss: {g_loss}]")
# Generate and save images
noise = np.random.normal(0, 1, (10, latent_dim))
generated_images = generator.predict(noise)
fig, axs = plt.subplots(1, 10, figsize=(20, 2))
for i, img in enumerate(generated_images):
axs[i].imshow((img * 127.5 + 127.5).astype(np.uint8))
axs[i].axis('off')
plt.show()
# Function to calculate Inception Score
def calculate_inception_score(images, num_splits=10):
model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
images = tf.image.resize(images, (299, 299))
images = preprocess_input(images)
preds = model.predict(images)
scores = []
for i in range(num_splits):
part = preds[i * len(preds) // num_splits: (i + 1) * len(preds) // num_splits]
py = np.mean(part, axis=0)
scores.append(np.exp(np.mean([np.sum(p * np.log(p / py)) for p in part])))
return np.mean(scores), np.std(scores)
# Function to calculate FID score
def calculate_fid(real_images, generated_images):
model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
real_images = tf.image.resize(real_images, (299, 299))
real_images = preprocess_input(real_images)
gen_images = tf.image.resize(generated_images, (299, 299))
gen_images = preprocess_input(gen_images)
act1 = model.predict(real_images)
act2 = model.predict(gen_images)
mu1, sigma1 = act1.mean(axis=0), np.cov(act1, rowvar=False)
mu2, sigma2 = act2.mean(axis=0), np.cov(act2, rowvar=False)
ssdiff = np.sum((mu1 - mu2) ** 2.0)
covmean = sqrtm(sigma1.dot(sigma2))
if np.iscomplexobj(covmean):
covmean = covmean.real
fid = ssdiff + np.trace(sigma1 + sigma2 - 2.0 * covmean)
return fid
# Generate some fake images using the trained GAN generator
noise = np.random.normal(0, 1, (1000, latent_dim))
generated_images = generator.predict(noise)
# Calculate Inception Score
is_mean, is_std = calculate_inception_score(generated_images)
print(f"Inception Score: {is_mean} ± {is_std}")
# Calculate FID Score
real_images = x_train[np.random.choice(x_train.shape[0], 1000, replace=False)]
fid_score = calculate_fid(real_images, generated_images)
print(f"FID Score: {fid_score}")
Exercise 3: Implement and Train a CycleGAN
Task: Implement and train a CycleGAN to perform image-to-image translation between two domains, such as translating photos to paintings.
Solution:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Define CycleGAN generator model
def build_cyclegan_generator(img_shape):
input_img = tf.keras.Input(shape=img_shape)
x = tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same')(input_img)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same')(x)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same')(x)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
x = tf.keras.layers.BatchNormalization()(x)
output_img = tf.keras.layers.Conv2DTranspose(3, kernel_size=4, strides=2, padding='same', activation='tanh')(x)
return tf.keras.Model(input_img, output_img)
# Define CycleGAN discriminator model
def build_cyclegan_discriminator(img_shape):
input_img = tf.keras.Input(shape=img_shape)
x = tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same')(input_img)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
x = tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same')(x)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
x = tf.keras.layers.Flatten()(x)
validity = tf.keras.layers.Dense(1, activation='sigmoid')(x)
return tf.keras.Model(input_img, validity)
# Build CycleGAN models
img_shape = (128, 128, 3)
G_AB = build_cyclegan_generator(img_shape)
G_BA = build_cyclegan_generator(img_shape)
D_A = build_cyclegan_discriminator(img_shape)
D_B = build_cyclegan_discriminator(img_shape)
D_A.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
D_B.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# CycleGAN loss
def cycle_loss(y_true, y_pred):
return tf.reduce_mean(tf.abs(y_true - y_pred))
# Full CycleGAN model
img_A = tf.keras.Input(shape=img_shape)
img_B = tf.keras.Input(shape=img_shape)
fake_B = G_AB(img_A)
reconstr_A = G_BA(fake_B)
fake_A = G_BA(img_B)
reconstr_B = G_AB(fake_A)
D_A.trainable = False
D_B.trainable = False
valid_A = D_A(fake_A)
valid_B = D_B(fake_B)
cycle_gan = tf.keras.Model(inputs=[img_A, img_B], outputs=[valid_A, valid_B, reconstr_A, reconstr_B])
cycle_gan.compile(optimizer='adam', loss=['binary_crossentropy', 'binary_crossentropy', cycle_loss, cycle_loss])
# Summary of the models
G_AB.summary()
G_BA.summary()
D_A.summary()
D_B.summary()
cycle_gan.summary()
# Training parameters
epochs = 10000
batch_size = 64
sample_interval = 1000
# Load and preprocess the dataset (e.g., two image domains such as photos and paintings)
# Placeholder code for dataset loading
domain_A = ... # Load your domain A images
domain_B = ... # Load your domain B images
# Training loop
for epoch in range(epochs):
# Train the discriminators
idx_A = np.random.randint(0, domain_A.shape[0], batch_size)
idx_B = np.random.randint(0, domain_B.shape[0], batch_size)
real_A = domain_A[idx_A]
real_B = domain_B[idx_B]
fake_B = G_AB.predict(real_A)
fake_A = G_BA.predict(real_B)
dA_loss_real = D_A.train_on_batch(real_A, np.ones((batch_size, 1)))
dA_loss_fake = D_A.train_on_batch(fake_A, np.zeros((batch_size, 1)))
dA_loss = 0.5 * np.add(dA_loss_real, dA_loss_fake)
dB_loss_real = D_B.train_on_batch(real_B, np.ones((batch_size, 1)))
dB_loss_fake = D_B.train_on_batch(fake_B, np.zeros((batch_size, 1)))
dB_loss = 0.5 * np.add(dB_loss_real, dB_loss_fake)
# Train the generators
g_loss = cycle_gan.train_on_batch([real_A, real_B], [np.ones((batch_size, 1)), np.ones((batch_size, 1)), real_A, real_B])
# Print progress
if epoch % sample_interval == 0:
print(f"{epoch} [D_A loss: {dA_loss[0]}, acc.: {dA_loss[1] * 100}%] [D_B loss: {dB_loss[0]}, acc.: {dB_loss[1] * 100}%] [G loss: {g_loss}]")
# Generate and save translated images
fake_B = G_AB.predict(real_A)
fake_A = G_BA.predict(real_B)
fig, axs = plt.subplots(2, 10, figsize=(20, 4))
for i in range(10):
axs[0, i].imshow(fake_B[i])
axs[0, i].axis('off')
axs[1, i].imshow(fake_A[i])
axs[1, i].axis('off')
plt.show()
Exercise 4: Implement a Conditional GAN (cGAN)
Task: Implement a Conditional GAN (cGAN) to generate images conditioned on class labels from the MNIST dataset.
Solution:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Define Conditional GAN generator model
def build_cgan_generator(latent_dim, num_classes, img_shape):
noise = tf.keras.Input(shape=(latent_dim,))
label = tf.keras.Input(shape=(1,), dtype='int32')
label_embedding = tf.keras.layers.Flatten()(tf.keras.layers.Embedding(num_classes, latent_dim)(label))
model_input = tf.keras.layers.multiply([noise, label_embedding])
x = tf.keras.layers.Dense(256 * 7 * 7, activation="relu")(model_input)
x = tf.keras.layers.Reshape((7, 7, 256))(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding='same')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.LeakyReLU(alpha=0.2
)(x)
x = tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
output_img = tf.keras.layers.Conv2DTranspose(img_shape[-1], kernel_size=4, strides=1, padding='same', activation='tanh')(x)
return tf.keras.Model([noise, label], output_img)
# Define Conditional GAN discriminator model
def build_cgan_discriminator(img_shape, num_classes):
img = tf.keras.Input(shape=img_shape)
label = tf.keras.Input(shape=(1,), dtype='int32')
label_embedding = tf.keras.layers.Flatten()(tf.keras.layers.Embedding(num_classes, np.prod(img_shape))(label))
label_embedding = tf.keras.layers.Reshape(img_shape)(label_embedding)
model_input = tf.keras.layers.multiply([img, label_embedding])
x = tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same')(model_input)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
x = tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same')(x)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
x = tf.keras.layers.Flatten()(x)
validity = tf.keras.layers.Dense(1, activation='sigmoid')(x)
return tf.keras.Model([img, label], validity)
# Build and compile the Conditional GAN
latent_dim = 100
num_classes = 10
img_shape = (28, 28, 1)
generator = build_cgan_generator(latent_dim, num_classes, img_shape)
discriminator = build_cgan_discriminator(img_shape, num_classes)
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
discriminator.trainable = False
noise = tf.keras.Input(shape=(latent_dim,))
label = tf.keras.Input(shape=(1,), dtype='int32')
generated_img = generator([noise, label])
validity = discriminator([generated_img, label])
cgan = tf.keras.Model([noise, label], validity)
cgan.compile(optimizer='adam', loss='binary_crossentropy')
# Load and preprocess the MNIST dataset
(x_train, y_train), (_, _) = tf.keras.datasets.mnist.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5 # Normalize to [-1, 1]
x_train = np.expand_dims(x_train, axis=-1)
# Training parameters
epochs = 10000
batch_size = 64
sample_interval = 1000
# Training loop
for epoch in range(epochs):
# Train the discriminator
idx = np.random.randint(0, x_train.shape[0], batch_size)
real_images = x_train[idx]
real_labels = y_train[idx]
noise = np.random.normal(0, 1, (batch_size, latent_dim))
fake_labels = np.random.randint(0, num_classes, batch_size)
fake_images = generator.predict([noise, fake_labels])
d_loss_real = discriminator.train_on_batch([real_images, real_labels], np.ones((batch_size, 1)))
d_loss_fake = discriminator.train_on_batch([fake_images, fake_labels], np.zeros((batch_size, 1)))
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
# Train the generator
noise = np.random.normal(0, 1, (batch_size, latent_dim))
sampled_labels = np.random.randint(0, num_classes, batch_size)
g_loss = cgan.train_on_batch([noise, sampled_labels], np.ones((batch_size, 1)))
# Print progress
if epoch % sample_interval == 0:
print(f"{epoch} [D loss: {d_loss[0]}, acc.: {d_loss[1] * 100}%] [G loss: {g_loss}]")
# Generate and save images
noise = np.random.normal(0, 1, (10, latent_dim))
sampled_labels = np.arange(0, 10).reshape(-1, 1)
generated_images = generator.predict([noise, sampled_labels])
fig, axs = plt.subplots(1, 10, figsize=(20, 2))
for i, img in enumerate(generated_images):
axs[i].imshow(img.squeeze(), cmap='gray')
axs[i].axis('off')
plt.show()
Exercise 5: Evaluate a GAN Using Inception Score and FID
Task: Evaluate the performance of a trained GAN using Inception Score (IS) and Fréchet Inception Distance (FID) on generated images.
Solution:
import tensorflow as tf
import numpy as np
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from scipy.linalg import sqrtm
# Function to calculate Inception Score
def calculate_inception_score(images, num_splits=10):
model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
images = tf.image.resize(images, (299, 299))
images = preprocess_input(images)
preds = model.predict(images)
scores = []
for i in range(num_splits):
part = preds[i * len(preds) // num_splits: (i + 1) * len(preds) // num_splits]
py = np.mean(part, axis=0)
scores.append(np.exp(np.mean([np.sum(p * np.log(p / py)) for p in part])))
return np.mean(scores), np.std(scores)
# Function to calculate FID score
def calculate_fid(real_images, generated_images):
model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
real_images = tf.image.resize(real_images, (299, 299))
real_images = preprocess_input(real_images)
gen_images = tf.image.resize(generated_images, (299, 299))
gen_images = preprocess_input(gen_images)
act1 = model.predict(real_images)
act2 = model.predict(gen_images)
mu1, sigma1 = act1.mean(axis=0), np.cov(act1, rowvar=False)
mu2, sigma2 = act2.mean(axis=0), np.cov(act2, rowvar=False)
ssdiff = np.sum((mu1 - mu2) ** 2.0)
covmean = sqrtm(sigma1.dot(sigma2))
if np.iscomplexobj(covmean):
covmean = covmean.real
fid = ssdiff + np.trace(sigma1 + sigma2 - 2.0 * covmean)
return fid
# Example: Evaluate a trained GAN on CIFAR-10 dataset
latent_dim = 100
img_shape = (32, 32, 3)
# Load CIFAR-10 dataset
(x_train, _), (_, _) = tf.keras.datasets.cifar10.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5 # Normalize to [-1, 1]
# Assume generator is the trained GAN generator
# Generate some fake images using the trained GAN generator
noise = np.random.normal(0, 1, (1000, latent_dim))
generated_images = generator.predict(noise)
# Calculate Inception Score
is_mean, is_std = calculate_inception_score(generated_images)
print(f"Inception Score: {is_mean} ± {is_std}")
# Calculate FID Score
real_images = x_train[np.random.choice(x_train.shape[0], 1000, replace=False)]
fid_score = calculate_fid(real_images, generated_images)
print(f"FID Score: {fid_score}")
These exercises provide hands-on experience with building, training, and evaluating various types of GANs. By working through these exercises, you will deepen your understanding of GANs and their practical applications in different domains.
3.8 Practical Exercises - Chapter 3: Deep Dive into Generative Adversarial Networks (GANs)
These practical exercises are designed to reinforce the concepts covered in this chapter. By working through these exercises, you will gain hands-on experience with GANs, including their architecture, training, evaluation, and recent innovations.
Exercise 1: Build and Train a Basic GAN
Task: Build and train a basic GAN to generate 28x28 grayscale images similar to the MNIST dataset.
Solution:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Define the generator model
def build_generator(latent_dim):
model = tf.keras.Sequential([
tf.keras.layers.Dense(256 * 7 * 7, activation="relu", input_dim=latent_dim),
tf.keras.layers.Reshape((7, 7, 256)),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Conv2DTranspose(1, kernel_size=4, strides=1, padding='same', activation='tanh')
])
return model
# Define the discriminator model
def build_discriminator(img_shape):
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same', input_shape=img_shape),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same'),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(1, activation='sigmoid')
])
return model
# Instantiate the GAN
latent_dim = 100
img_shape = (28, 28, 1)
generator = build_generator(latent_dim)
discriminator = build_discriminator(img_shape)
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
discriminator.trainable = False
gan_input = tf.keras.Input(shape=(latent_dim,))
generated_img = generator(gan_input)
validity = discriminator(generated_img)
gan = tf.keras.Model(gan_input, validity)
gan.compile(optimizer='adam', loss='binary_crossentropy')
# Load and preprocess the MNIST dataset
(x_train, _), (_, _) = tf.keras.datasets.mnist.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5 # Normalize to [-1, 1]
x_train = np.expand_dims(x_train, axis=-1)
# Training parameters
epochs = 10000
batch_size = 64
sample_interval = 1000
for epoch in range(epochs):
# Train the discriminator
idx = np.random.randint(0, x_train.shape[0], batch_size)
real_images = x_train[idx]
noise = np.random.normal(0, 1, (batch_size, latent_dim))
fake_images = generator.predict(noise)
d_loss_real = discriminator.train_on_batch(real_images, np.ones((batch_size, 1)))
d_loss_fake = discriminator.train_on_batch(fake_images, np.zeros((batch_size, 1)))
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
# Train the generator
noise = np.random.normal(0, 1, (batch_size, latent_dim))
g_loss = gan.train_on_batch(noise, np.ones((batch_size, 1)))
# Print progress
if epoch % sample_interval == 0:
print(f"{epoch} [D loss: {d_loss[0]}, acc.: {d_loss[1] * 100}%] [G loss: {g_loss}]")
# Generate and save images
noise = np.random.normal(0, 1, (10, latent_dim))
generated_images = generator.predict(noise)
fig, axs = plt.subplots(1, 10, figsize=(20, 2))
for i, img in enumerate(generated_images):
axs[i].imshow(img.squeeze(), cmap='gray')
axs[i].axis('off')
plt.show()
Exercise 2: Implement and Evaluate a DCGAN
Task: Implement a Deep Convolutional GAN (DCGAN) to generate 64x64 RGB images. Evaluate the model using Inception Score (IS) and Fréchet Inception Distance (FID).
Solution:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from scipy.linalg import sqrtm
# Define DCGAN generator model
def build_dcgan_generator(latent_dim):
model = tf.keras.Sequential([
tf.keras.layers.Dense(256 * 8 * 8, activation="relu", input_dim=latent_dim),
tf.keras.layers.Reshape((8, 8, 256)),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Conv2DTranspose(3, kernel_size=4, strides=2, padding='same', activation='tanh')
])
return model
# Define DCGAN discriminator model
def build_dcgan_discriminator(img_shape):
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same', input_shape=img_shape),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Conv2D(256, kernel_size=4, strides=2, padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(1, activation='sigmoid')
])
return model
# Training parameters
latent_dim = 100
img_shape = (64, 64, 3)
epochs = 10000
batch_size = 64
sample_interval = 1000
# Instantiate the DCGAN
generator = build_dcgan_generator(latent_dim)
discriminator = build_dcgan_discriminator(img_shape)
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
discriminator.trainable = False
gan_input = tf.keras.Input(shape=(latent_dim,))
generated_img = generator(gan_input)
validity = discriminator(generated_img)
dcgan = tf.keras.Model(gan_input, validity)
dcgan.compile(optimizer='adam', loss='binary_crossentropy')
# Load and preprocess the dataset (e.g., CIFAR-10)
(x_train, _), (_, _) = tf.keras.datasets.cifar10.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5 # Normalize to [-1, 1]
# Training loop
for epoch in range(epochs):
# Train the discriminator
idx = np.random.randint(0, x_train.shape[0], batch_size)
real_images = x_train[idx]
noise = np.random.normal(0, 1, (batch_size, latent_dim))
fake_images = generator.predict(noise)
d_loss_real = discriminator.train_on_batch(real_images, np.ones((batch_size, 1)))
d_loss_fake = discriminator.train_on_batch(fake_images, np.zeros((batch_size, 1)))
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
# Train the generator
noise = np.random.normal(0, 1, (batch_size, latent_dim))
g_loss = dcgan.train_on_batch(noise, np.ones((batch_size, 1)))
# Print progress
if epoch % sample_interval == 0:
print(f"{epoch} [D loss: {d_loss[0]}, acc.: {d_loss[1] * 100}%] [G loss: {g_loss}]")
# Generate and save images
noise = np.random.normal(0, 1, (10, latent_dim))
generated_images = generator.predict(noise)
fig, axs = plt.subplots(1, 10, figsize=(20, 2))
for i, img in enumerate(generated_images):
axs[i].imshow((img * 127.5 + 127.5).astype(np.uint8))
axs[i].axis('off')
plt.show()
# Function to calculate Inception Score
def calculate_inception_score(images, num_splits=10):
model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
images = tf.image.resize(images, (299, 299))
images = preprocess_input(images)
preds = model.predict(images)
scores = []
for i in range(num_splits):
part = preds[i * len(preds) // num_splits: (i + 1) * len(preds) // num_splits]
py = np.mean(part, axis=0)
scores.append(np.exp(np.mean([np.sum(p * np.log(p / py)) for p in part])))
return np.mean(scores), np.std(scores)
# Function to calculate FID score
def calculate_fid(real_images, generated_images):
model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
real_images = tf.image.resize(real_images, (299, 299))
real_images = preprocess_input(real_images)
gen_images = tf.image.resize(generated_images, (299, 299))
gen_images = preprocess_input(gen_images)
act1 = model.predict(real_images)
act2 = model.predict(gen_images)
mu1, sigma1 = act1.mean(axis=0), np.cov(act1, rowvar=False)
mu2, sigma2 = act2.mean(axis=0), np.cov(act2, rowvar=False)
ssdiff = np.sum((mu1 - mu2) ** 2.0)
covmean = sqrtm(sigma1.dot(sigma2))
if np.iscomplexobj(covmean):
covmean = covmean.real
fid = ssdiff + np.trace(sigma1 + sigma2 - 2.0 * covmean)
return fid
# Generate some fake images using the trained GAN generator
noise = np.random.normal(0, 1, (1000, latent_dim))
generated_images = generator.predict(noise)
# Calculate Inception Score
is_mean, is_std = calculate_inception_score(generated_images)
print(f"Inception Score: {is_mean} ± {is_std}")
# Calculate FID Score
real_images = x_train[np.random.choice(x_train.shape[0], 1000, replace=False)]
fid_score = calculate_fid(real_images, generated_images)
print(f"FID Score: {fid_score}")
Exercise 3: Implement and Train a CycleGAN
Task: Implement and train a CycleGAN to perform image-to-image translation between two domains, such as translating photos to paintings.
Solution:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Define CycleGAN generator model
def build_cyclegan_generator(img_shape):
input_img = tf.keras.Input(shape=img_shape)
x = tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same')(input_img)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same')(x)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same')(x)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
x = tf.keras.layers.BatchNormalization()(x)
output_img = tf.keras.layers.Conv2DTranspose(3, kernel_size=4, strides=2, padding='same', activation='tanh')(x)
return tf.keras.Model(input_img, output_img)
# Define CycleGAN discriminator model
def build_cyclegan_discriminator(img_shape):
input_img = tf.keras.Input(shape=img_shape)
x = tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same')(input_img)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
x = tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same')(x)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
x = tf.keras.layers.Flatten()(x)
validity = tf.keras.layers.Dense(1, activation='sigmoid')(x)
return tf.keras.Model(input_img, validity)
# Build CycleGAN models
img_shape = (128, 128, 3)
G_AB = build_cyclegan_generator(img_shape)
G_BA = build_cyclegan_generator(img_shape)
D_A = build_cyclegan_discriminator(img_shape)
D_B = build_cyclegan_discriminator(img_shape)
D_A.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
D_B.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# CycleGAN loss
def cycle_loss(y_true, y_pred):
return tf.reduce_mean(tf.abs(y_true - y_pred))
# Full CycleGAN model
img_A = tf.keras.Input(shape=img_shape)
img_B = tf.keras.Input(shape=img_shape)
fake_B = G_AB(img_A)
reconstr_A = G_BA(fake_B)
fake_A = G_BA(img_B)
reconstr_B = G_AB(fake_A)
D_A.trainable = False
D_B.trainable = False
valid_A = D_A(fake_A)
valid_B = D_B(fake_B)
cycle_gan = tf.keras.Model(inputs=[img_A, img_B], outputs=[valid_A, valid_B, reconstr_A, reconstr_B])
cycle_gan.compile(optimizer='adam', loss=['binary_crossentropy', 'binary_crossentropy', cycle_loss, cycle_loss])
# Summary of the models
G_AB.summary()
G_BA.summary()
D_A.summary()
D_B.summary()
cycle_gan.summary()
# Training parameters
epochs = 10000
batch_size = 64
sample_interval = 1000
# Load and preprocess the dataset (e.g., two image domains such as photos and paintings)
# Placeholder code for dataset loading
domain_A = ... # Load your domain A images
domain_B = ... # Load your domain B images
# Training loop
for epoch in range(epochs):
# Train the discriminators
idx_A = np.random.randint(0, domain_A.shape[0], batch_size)
idx_B = np.random.randint(0, domain_B.shape[0], batch_size)
real_A = domain_A[idx_A]
real_B = domain_B[idx_B]
fake_B = G_AB.predict(real_A)
fake_A = G_BA.predict(real_B)
dA_loss_real = D_A.train_on_batch(real_A, np.ones((batch_size, 1)))
dA_loss_fake = D_A.train_on_batch(fake_A, np.zeros((batch_size, 1)))
dA_loss = 0.5 * np.add(dA_loss_real, dA_loss_fake)
dB_loss_real = D_B.train_on_batch(real_B, np.ones((batch_size, 1)))
dB_loss_fake = D_B.train_on_batch(fake_B, np.zeros((batch_size, 1)))
dB_loss = 0.5 * np.add(dB_loss_real, dB_loss_fake)
# Train the generators
g_loss = cycle_gan.train_on_batch([real_A, real_B], [np.ones((batch_size, 1)), np.ones((batch_size, 1)), real_A, real_B])
# Print progress
if epoch % sample_interval == 0:
print(f"{epoch} [D_A loss: {dA_loss[0]}, acc.: {dA_loss[1] * 100}%] [D_B loss: {dB_loss[0]}, acc.: {dB_loss[1] * 100}%] [G loss: {g_loss}]")
# Generate and save translated images
fake_B = G_AB.predict(real_A)
fake_A = G_BA.predict(real_B)
fig, axs = plt.subplots(2, 10, figsize=(20, 4))
for i in range(10):
axs[0, i].imshow(fake_B[i])
axs[0, i].axis('off')
axs[1, i].imshow(fake_A[i])
axs[1, i].axis('off')
plt.show()
Exercise 4: Implement a Conditional GAN (cGAN)
Task: Implement a Conditional GAN (cGAN) to generate images conditioned on class labels from the MNIST dataset.
Solution:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Define Conditional GAN generator model
def build_cgan_generator(latent_dim, num_classes, img_shape):
noise = tf.keras.Input(shape=(latent_dim,))
label = tf.keras.Input(shape=(1,), dtype='int32')
label_embedding = tf.keras.layers.Flatten()(tf.keras.layers.Embedding(num_classes, latent_dim)(label))
model_input = tf.keras.layers.multiply([noise, label_embedding])
x = tf.keras.layers.Dense(256 * 7 * 7, activation="relu")(model_input)
x = tf.keras.layers.Reshape((7, 7, 256))(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding='same')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.LeakyReLU(alpha=0.2
)(x)
x = tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
output_img = tf.keras.layers.Conv2DTranspose(img_shape[-1], kernel_size=4, strides=1, padding='same', activation='tanh')(x)
return tf.keras.Model([noise, label], output_img)
# Define Conditional GAN discriminator model
def build_cgan_discriminator(img_shape, num_classes):
img = tf.keras.Input(shape=img_shape)
label = tf.keras.Input(shape=(1,), dtype='int32')
label_embedding = tf.keras.layers.Flatten()(tf.keras.layers.Embedding(num_classes, np.prod(img_shape))(label))
label_embedding = tf.keras.layers.Reshape(img_shape)(label_embedding)
model_input = tf.keras.layers.multiply([img, label_embedding])
x = tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same')(model_input)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
x = tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same')(x)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
x = tf.keras.layers.Flatten()(x)
validity = tf.keras.layers.Dense(1, activation='sigmoid')(x)
return tf.keras.Model([img, label], validity)
# Build and compile the Conditional GAN
latent_dim = 100
num_classes = 10
img_shape = (28, 28, 1)
generator = build_cgan_generator(latent_dim, num_classes, img_shape)
discriminator = build_cgan_discriminator(img_shape, num_classes)
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
discriminator.trainable = False
noise = tf.keras.Input(shape=(latent_dim,))
label = tf.keras.Input(shape=(1,), dtype='int32')
generated_img = generator([noise, label])
validity = discriminator([generated_img, label])
cgan = tf.keras.Model([noise, label], validity)
cgan.compile(optimizer='adam', loss='binary_crossentropy')
# Load and preprocess the MNIST dataset
(x_train, y_train), (_, _) = tf.keras.datasets.mnist.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5 # Normalize to [-1, 1]
x_train = np.expand_dims(x_train, axis=-1)
# Training parameters
epochs = 10000
batch_size = 64
sample_interval = 1000
# Training loop
for epoch in range(epochs):
# Train the discriminator
idx = np.random.randint(0, x_train.shape[0], batch_size)
real_images = x_train[idx]
real_labels = y_train[idx]
noise = np.random.normal(0, 1, (batch_size, latent_dim))
fake_labels = np.random.randint(0, num_classes, batch_size)
fake_images = generator.predict([noise, fake_labels])
d_loss_real = discriminator.train_on_batch([real_images, real_labels], np.ones((batch_size, 1)))
d_loss_fake = discriminator.train_on_batch([fake_images, fake_labels], np.zeros((batch_size, 1)))
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
# Train the generator
noise = np.random.normal(0, 1, (batch_size, latent_dim))
sampled_labels = np.random.randint(0, num_classes, batch_size)
g_loss = cgan.train_on_batch([noise, sampled_labels], np.ones((batch_size, 1)))
# Print progress
if epoch % sample_interval == 0:
print(f"{epoch} [D loss: {d_loss[0]}, acc.: {d_loss[1] * 100}%] [G loss: {g_loss}]")
# Generate and save images
noise = np.random.normal(0, 1, (10, latent_dim))
sampled_labels = np.arange(0, 10).reshape(-1, 1)
generated_images = generator.predict([noise, sampled_labels])
fig, axs = plt.subplots(1, 10, figsize=(20, 2))
for i, img in enumerate(generated_images):
axs[i].imshow(img.squeeze(), cmap='gray')
axs[i].axis('off')
plt.show()
Exercise 5: Evaluate a GAN Using Inception Score and FID
Task: Evaluate the performance of a trained GAN using Inception Score (IS) and Fréchet Inception Distance (FID) on generated images.
Solution:
import tensorflow as tf
import numpy as np
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from scipy.linalg import sqrtm
# Function to calculate Inception Score
def calculate_inception_score(images, num_splits=10):
model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
images = tf.image.resize(images, (299, 299))
images = preprocess_input(images)
preds = model.predict(images)
scores = []
for i in range(num_splits):
part = preds[i * len(preds) // num_splits: (i + 1) * len(preds) // num_splits]
py = np.mean(part, axis=0)
scores.append(np.exp(np.mean([np.sum(p * np.log(p / py)) for p in part])))
return np.mean(scores), np.std(scores)
# Function to calculate FID score
def calculate_fid(real_images, generated_images):
model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
real_images = tf.image.resize(real_images, (299, 299))
real_images = preprocess_input(real_images)
gen_images = tf.image.resize(generated_images, (299, 299))
gen_images = preprocess_input(gen_images)
act1 = model.predict(real_images)
act2 = model.predict(gen_images)
mu1, sigma1 = act1.mean(axis=0), np.cov(act1, rowvar=False)
mu2, sigma2 = act2.mean(axis=0), np.cov(act2, rowvar=False)
ssdiff = np.sum((mu1 - mu2) ** 2.0)
covmean = sqrtm(sigma1.dot(sigma2))
if np.iscomplexobj(covmean):
covmean = covmean.real
fid = ssdiff + np.trace(sigma1 + sigma2 - 2.0 * covmean)
return fid
# Example: Evaluate a trained GAN on CIFAR-10 dataset
latent_dim = 100
img_shape = (32, 32, 3)
# Load CIFAR-10 dataset
(x_train, _), (_, _) = tf.keras.datasets.cifar10.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5 # Normalize to [-1, 1]
# Assume generator is the trained GAN generator
# Generate some fake images using the trained GAN generator
noise = np.random.normal(0, 1, (1000, latent_dim))
generated_images = generator.predict(noise)
# Calculate Inception Score
is_mean, is_std = calculate_inception_score(generated_images)
print(f"Inception Score: {is_mean} ± {is_std}")
# Calculate FID Score
real_images = x_train[np.random.choice(x_train.shape[0], 1000, replace=False)]
fid_score = calculate_fid(real_images, generated_images)
print(f"FID Score: {fid_score}")
These exercises provide hands-on experience with building, training, and evaluating various types of GANs. By working through these exercises, you will deepen your understanding of GANs and their practical applications in different domains.
3.8 Practical Exercises - Chapter 3: Deep Dive into Generative Adversarial Networks (GANs)
These practical exercises are designed to reinforce the concepts covered in this chapter. By working through these exercises, you will gain hands-on experience with GANs, including their architecture, training, evaluation, and recent innovations.
Exercise 1: Build and Train a Basic GAN
Task: Build and train a basic GAN to generate 28x28 grayscale images similar to the MNIST dataset.
Solution:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Define the generator model
def build_generator(latent_dim):
model = tf.keras.Sequential([
tf.keras.layers.Dense(256 * 7 * 7, activation="relu", input_dim=latent_dim),
tf.keras.layers.Reshape((7, 7, 256)),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Conv2DTranspose(1, kernel_size=4, strides=1, padding='same', activation='tanh')
])
return model
# Define the discriminator model
def build_discriminator(img_shape):
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same', input_shape=img_shape),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same'),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(1, activation='sigmoid')
])
return model
# Instantiate the GAN
latent_dim = 100
img_shape = (28, 28, 1)
generator = build_generator(latent_dim)
discriminator = build_discriminator(img_shape)
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
discriminator.trainable = False
gan_input = tf.keras.Input(shape=(latent_dim,))
generated_img = generator(gan_input)
validity = discriminator(generated_img)
gan = tf.keras.Model(gan_input, validity)
gan.compile(optimizer='adam', loss='binary_crossentropy')
# Load and preprocess the MNIST dataset
(x_train, _), (_, _) = tf.keras.datasets.mnist.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5 # Normalize to [-1, 1]
x_train = np.expand_dims(x_train, axis=-1)
# Training parameters
epochs = 10000
batch_size = 64
sample_interval = 1000
for epoch in range(epochs):
# Train the discriminator
idx = np.random.randint(0, x_train.shape[0], batch_size)
real_images = x_train[idx]
noise = np.random.normal(0, 1, (batch_size, latent_dim))
fake_images = generator.predict(noise)
d_loss_real = discriminator.train_on_batch(real_images, np.ones((batch_size, 1)))
d_loss_fake = discriminator.train_on_batch(fake_images, np.zeros((batch_size, 1)))
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
# Train the generator
noise = np.random.normal(0, 1, (batch_size, latent_dim))
g_loss = gan.train_on_batch(noise, np.ones((batch_size, 1)))
# Print progress
if epoch % sample_interval == 0:
print(f"{epoch} [D loss: {d_loss[0]}, acc.: {d_loss[1] * 100}%] [G loss: {g_loss}]")
# Generate and save images
noise = np.random.normal(0, 1, (10, latent_dim))
generated_images = generator.predict(noise)
fig, axs = plt.subplots(1, 10, figsize=(20, 2))
for i, img in enumerate(generated_images):
axs[i].imshow(img.squeeze(), cmap='gray')
axs[i].axis('off')
plt.show()
Exercise 2: Implement and Evaluate a DCGAN
Task: Implement a Deep Convolutional GAN (DCGAN) to generate 64x64 RGB images. Evaluate the model using Inception Score (IS) and Fréchet Inception Distance (FID).
Solution:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from scipy.linalg import sqrtm
# Define DCGAN generator model
def build_dcgan_generator(latent_dim):
model = tf.keras.Sequential([
tf.keras.layers.Dense(256 * 8 * 8, activation="relu", input_dim=latent_dim),
tf.keras.layers.Reshape((8, 8, 256)),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Conv2DTranspose(3, kernel_size=4, strides=2, padding='same', activation='tanh')
])
return model
# Define DCGAN discriminator model
def build_dcgan_discriminator(img_shape):
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same', input_shape=img_shape),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Conv2D(256, kernel_size=4, strides=2, padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(1, activation='sigmoid')
])
return model
# Training parameters
latent_dim = 100
img_shape = (64, 64, 3)
epochs = 10000
batch_size = 64
sample_interval = 1000
# Instantiate the DCGAN
generator = build_dcgan_generator(latent_dim)
discriminator = build_dcgan_discriminator(img_shape)
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
discriminator.trainable = False
gan_input = tf.keras.Input(shape=(latent_dim,))
generated_img = generator(gan_input)
validity = discriminator(generated_img)
dcgan = tf.keras.Model(gan_input, validity)
dcgan.compile(optimizer='adam', loss='binary_crossentropy')
# Load and preprocess the dataset (e.g., CIFAR-10)
(x_train, _), (_, _) = tf.keras.datasets.cifar10.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5 # Normalize to [-1, 1]
# Training loop
for epoch in range(epochs):
# Train the discriminator
idx = np.random.randint(0, x_train.shape[0], batch_size)
real_images = x_train[idx]
noise = np.random.normal(0, 1, (batch_size, latent_dim))
fake_images = generator.predict(noise)
d_loss_real = discriminator.train_on_batch(real_images, np.ones((batch_size, 1)))
d_loss_fake = discriminator.train_on_batch(fake_images, np.zeros((batch_size, 1)))
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
# Train the generator
noise = np.random.normal(0, 1, (batch_size, latent_dim))
g_loss = dcgan.train_on_batch(noise, np.ones((batch_size, 1)))
# Print progress
if epoch % sample_interval == 0:
print(f"{epoch} [D loss: {d_loss[0]}, acc.: {d_loss[1] * 100}%] [G loss: {g_loss}]")
# Generate and save images
noise = np.random.normal(0, 1, (10, latent_dim))
generated_images = generator.predict(noise)
fig, axs = plt.subplots(1, 10, figsize=(20, 2))
for i, img in enumerate(generated_images):
axs[i].imshow((img * 127.5 + 127.5).astype(np.uint8))
axs[i].axis('off')
plt.show()
# Function to calculate Inception Score
def calculate_inception_score(images, num_splits=10):
model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
images = tf.image.resize(images, (299, 299))
images = preprocess_input(images)
preds = model.predict(images)
scores = []
for i in range(num_splits):
part = preds[i * len(preds) // num_splits: (i + 1) * len(preds) // num_splits]
py = np.mean(part, axis=0)
scores.append(np.exp(np.mean([np.sum(p * np.log(p / py)) for p in part])))
return np.mean(scores), np.std(scores)
# Function to calculate FID score
def calculate_fid(real_images, generated_images):
model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
real_images = tf.image.resize(real_images, (299, 299))
real_images = preprocess_input(real_images)
gen_images = tf.image.resize(generated_images, (299, 299))
gen_images = preprocess_input(gen_images)
act1 = model.predict(real_images)
act2 = model.predict(gen_images)
mu1, sigma1 = act1.mean(axis=0), np.cov(act1, rowvar=False)
mu2, sigma2 = act2.mean(axis=0), np.cov(act2, rowvar=False)
ssdiff = np.sum((mu1 - mu2) ** 2.0)
covmean = sqrtm(sigma1.dot(sigma2))
if np.iscomplexobj(covmean):
covmean = covmean.real
fid = ssdiff + np.trace(sigma1 + sigma2 - 2.0 * covmean)
return fid
# Generate some fake images using the trained GAN generator
noise = np.random.normal(0, 1, (1000, latent_dim))
generated_images = generator.predict(noise)
# Calculate Inception Score
is_mean, is_std = calculate_inception_score(generated_images)
print(f"Inception Score: {is_mean} ± {is_std}")
# Calculate FID Score
real_images = x_train[np.random.choice(x_train.shape[0], 1000, replace=False)]
fid_score = calculate_fid(real_images, generated_images)
print(f"FID Score: {fid_score}")
Exercise 3: Implement and Train a CycleGAN
Task: Implement and train a CycleGAN to perform image-to-image translation between two domains, such as translating photos to paintings.
Solution:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Define CycleGAN generator model
def build_cyclegan_generator(img_shape):
input_img = tf.keras.Input(shape=img_shape)
x = tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same')(input_img)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same')(x)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same')(x)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
x = tf.keras.layers.BatchNormalization()(x)
output_img = tf.keras.layers.Conv2DTranspose(3, kernel_size=4, strides=2, padding='same', activation='tanh')(x)
return tf.keras.Model(input_img, output_img)
# Define CycleGAN discriminator model
def build_cyclegan_discriminator(img_shape):
input_img = tf.keras.Input(shape=img_shape)
x = tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same')(input_img)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
x = tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same')(x)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
x = tf.keras.layers.Flatten()(x)
validity = tf.keras.layers.Dense(1, activation='sigmoid')(x)
return tf.keras.Model(input_img, validity)
# Build CycleGAN models
img_shape = (128, 128, 3)
G_AB = build_cyclegan_generator(img_shape)
G_BA = build_cyclegan_generator(img_shape)
D_A = build_cyclegan_discriminator(img_shape)
D_B = build_cyclegan_discriminator(img_shape)
D_A.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
D_B.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# CycleGAN loss
def cycle_loss(y_true, y_pred):
return tf.reduce_mean(tf.abs(y_true - y_pred))
# Full CycleGAN model
img_A = tf.keras.Input(shape=img_shape)
img_B = tf.keras.Input(shape=img_shape)
fake_B = G_AB(img_A)
reconstr_A = G_BA(fake_B)
fake_A = G_BA(img_B)
reconstr_B = G_AB(fake_A)
D_A.trainable = False
D_B.trainable = False
valid_A = D_A(fake_A)
valid_B = D_B(fake_B)
cycle_gan = tf.keras.Model(inputs=[img_A, img_B], outputs=[valid_A, valid_B, reconstr_A, reconstr_B])
cycle_gan.compile(optimizer='adam', loss=['binary_crossentropy', 'binary_crossentropy', cycle_loss, cycle_loss])
# Summary of the models
G_AB.summary()
G_BA.summary()
D_A.summary()
D_B.summary()
cycle_gan.summary()
# Training parameters
epochs = 10000
batch_size = 64
sample_interval = 1000
# Load and preprocess the dataset (e.g., two image domains such as photos and paintings)
# Placeholder code for dataset loading
domain_A = ... # Load your domain A images
domain_B = ... # Load your domain B images
# Training loop
for epoch in range(epochs):
# Train the discriminators
idx_A = np.random.randint(0, domain_A.shape[0], batch_size)
idx_B = np.random.randint(0, domain_B.shape[0], batch_size)
real_A = domain_A[idx_A]
real_B = domain_B[idx_B]
fake_B = G_AB.predict(real_A)
fake_A = G_BA.predict(real_B)
dA_loss_real = D_A.train_on_batch(real_A, np.ones((batch_size, 1)))
dA_loss_fake = D_A.train_on_batch(fake_A, np.zeros((batch_size, 1)))
dA_loss = 0.5 * np.add(dA_loss_real, dA_loss_fake)
dB_loss_real = D_B.train_on_batch(real_B, np.ones((batch_size, 1)))
dB_loss_fake = D_B.train_on_batch(fake_B, np.zeros((batch_size, 1)))
dB_loss = 0.5 * np.add(dB_loss_real, dB_loss_fake)
# Train the generators
g_loss = cycle_gan.train_on_batch([real_A, real_B], [np.ones((batch_size, 1)), np.ones((batch_size, 1)), real_A, real_B])
# Print progress
if epoch % sample_interval == 0:
print(f"{epoch} [D_A loss: {dA_loss[0]}, acc.: {dA_loss[1] * 100}%] [D_B loss: {dB_loss[0]}, acc.: {dB_loss[1] * 100}%] [G loss: {g_loss}]")
# Generate and save translated images
fake_B = G_AB.predict(real_A)
fake_A = G_BA.predict(real_B)
fig, axs = plt.subplots(2, 10, figsize=(20, 4))
for i in range(10):
axs[0, i].imshow(fake_B[i])
axs[0, i].axis('off')
axs[1, i].imshow(fake_A[i])
axs[1, i].axis('off')
plt.show()
Exercise 4: Implement a Conditional GAN (cGAN)
Task: Implement a Conditional GAN (cGAN) to generate images conditioned on class labels from the MNIST dataset.
Solution:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Define Conditional GAN generator model
def build_cgan_generator(latent_dim, num_classes, img_shape):
noise = tf.keras.Input(shape=(latent_dim,))
label = tf.keras.Input(shape=(1,), dtype='int32')
label_embedding = tf.keras.layers.Flatten()(tf.keras.layers.Embedding(num_classes, latent_dim)(label))
model_input = tf.keras.layers.multiply([noise, label_embedding])
x = tf.keras.layers.Dense(256 * 7 * 7, activation="relu")(model_input)
x = tf.keras.layers.Reshape((7, 7, 256))(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding='same')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.LeakyReLU(alpha=0.2
)(x)
x = tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
output_img = tf.keras.layers.Conv2DTranspose(img_shape[-1], kernel_size=4, strides=1, padding='same', activation='tanh')(x)
return tf.keras.Model([noise, label], output_img)
# Define Conditional GAN discriminator model
def build_cgan_discriminator(img_shape, num_classes):
img = tf.keras.Input(shape=img_shape)
label = tf.keras.Input(shape=(1,), dtype='int32')
label_embedding = tf.keras.layers.Flatten()(tf.keras.layers.Embedding(num_classes, np.prod(img_shape))(label))
label_embedding = tf.keras.layers.Reshape(img_shape)(label_embedding)
model_input = tf.keras.layers.multiply([img, label_embedding])
x = tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same')(model_input)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
x = tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same')(x)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
x = tf.keras.layers.Flatten()(x)
validity = tf.keras.layers.Dense(1, activation='sigmoid')(x)
return tf.keras.Model([img, label], validity)
# Build and compile the Conditional GAN
latent_dim = 100
num_classes = 10
img_shape = (28, 28, 1)
generator = build_cgan_generator(latent_dim, num_classes, img_shape)
discriminator = build_cgan_discriminator(img_shape, num_classes)
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
discriminator.trainable = False
noise = tf.keras.Input(shape=(latent_dim,))
label = tf.keras.Input(shape=(1,), dtype='int32')
generated_img = generator([noise, label])
validity = discriminator([generated_img, label])
cgan = tf.keras.Model([noise, label], validity)
cgan.compile(optimizer='adam', loss='binary_crossentropy')
# Load and preprocess the MNIST dataset
(x_train, y_train), (_, _) = tf.keras.datasets.mnist.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5 # Normalize to [-1, 1]
x_train = np.expand_dims(x_train, axis=-1)
# Training parameters
epochs = 10000
batch_size = 64
sample_interval = 1000
# Training loop
for epoch in range(epochs):
# Train the discriminator
idx = np.random.randint(0, x_train.shape[0], batch_size)
real_images = x_train[idx]
real_labels = y_train[idx]
noise = np.random.normal(0, 1, (batch_size, latent_dim))
fake_labels = np.random.randint(0, num_classes, batch_size)
fake_images = generator.predict([noise, fake_labels])
d_loss_real = discriminator.train_on_batch([real_images, real_labels], np.ones((batch_size, 1)))
d_loss_fake = discriminator.train_on_batch([fake_images, fake_labels], np.zeros((batch_size, 1)))
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
# Train the generator
noise = np.random.normal(0, 1, (batch_size, latent_dim))
sampled_labels = np.random.randint(0, num_classes, batch_size)
g_loss = cgan.train_on_batch([noise, sampled_labels], np.ones((batch_size, 1)))
# Print progress
if epoch % sample_interval == 0:
print(f"{epoch} [D loss: {d_loss[0]}, acc.: {d_loss[1] * 100}%] [G loss: {g_loss}]")
# Generate and save images
noise = np.random.normal(0, 1, (10, latent_dim))
sampled_labels = np.arange(0, 10).reshape(-1, 1)
generated_images = generator.predict([noise, sampled_labels])
fig, axs = plt.subplots(1, 10, figsize=(20, 2))
for i, img in enumerate(generated_images):
axs[i].imshow(img.squeeze(), cmap='gray')
axs[i].axis('off')
plt.show()
Exercise 5: Evaluate a GAN Using Inception Score and FID
Task: Evaluate the performance of a trained GAN using Inception Score (IS) and Fréchet Inception Distance (FID) on generated images.
Solution:
import tensorflow as tf
import numpy as np
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from scipy.linalg import sqrtm
# Function to calculate Inception Score
def calculate_inception_score(images, num_splits=10):
model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
images = tf.image.resize(images, (299, 299))
images = preprocess_input(images)
preds = model.predict(images)
scores = []
for i in range(num_splits):
part = preds[i * len(preds) // num_splits: (i + 1) * len(preds) // num_splits]
py = np.mean(part, axis=0)
scores.append(np.exp(np.mean([np.sum(p * np.log(p / py)) for p in part])))
return np.mean(scores), np.std(scores)
# Function to calculate FID score
def calculate_fid(real_images, generated_images):
model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
real_images = tf.image.resize(real_images, (299, 299))
real_images = preprocess_input(real_images)
gen_images = tf.image.resize(generated_images, (299, 299))
gen_images = preprocess_input(gen_images)
act1 = model.predict(real_images)
act2 = model.predict(gen_images)
mu1, sigma1 = act1.mean(axis=0), np.cov(act1, rowvar=False)
mu2, sigma2 = act2.mean(axis=0), np.cov(act2, rowvar=False)
ssdiff = np.sum((mu1 - mu2) ** 2.0)
covmean = sqrtm(sigma1.dot(sigma2))
if np.iscomplexobj(covmean):
covmean = covmean.real
fid = ssdiff + np.trace(sigma1 + sigma2 - 2.0 * covmean)
return fid
# Example: Evaluate a trained GAN on CIFAR-10 dataset
latent_dim = 100
img_shape = (32, 32, 3)
# Load CIFAR-10 dataset
(x_train, _), (_, _) = tf.keras.datasets.cifar10.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5 # Normalize to [-1, 1]
# Assume generator is the trained GAN generator
# Generate some fake images using the trained GAN generator
noise = np.random.normal(0, 1, (1000, latent_dim))
generated_images = generator.predict(noise)
# Calculate Inception Score
is_mean, is_std = calculate_inception_score(generated_images)
print(f"Inception Score: {is_mean} ± {is_std}")
# Calculate FID Score
real_images = x_train[np.random.choice(x_train.shape[0], 1000, replace=False)]
fid_score = calculate_fid(real_images, generated_images)
print(f"FID Score: {fid_score}")
These exercises provide hands-on experience with building, training, and evaluating various types of GANs. By working through these exercises, you will deepen your understanding of GANs and their practical applications in different domains.
3.8 Practical Exercises - Chapter 3: Deep Dive into Generative Adversarial Networks (GANs)
These practical exercises are designed to reinforce the concepts covered in this chapter. By working through these exercises, you will gain hands-on experience with GANs, including their architecture, training, evaluation, and recent innovations.
Exercise 1: Build and Train a Basic GAN
Task: Build and train a basic GAN to generate 28x28 grayscale images similar to the MNIST dataset.
Solution:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Define the generator model
def build_generator(latent_dim):
model = tf.keras.Sequential([
tf.keras.layers.Dense(256 * 7 * 7, activation="relu", input_dim=latent_dim),
tf.keras.layers.Reshape((7, 7, 256)),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Conv2DTranspose(1, kernel_size=4, strides=1, padding='same', activation='tanh')
])
return model
# Define the discriminator model
def build_discriminator(img_shape):
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same', input_shape=img_shape),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same'),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(1, activation='sigmoid')
])
return model
# Instantiate the GAN
latent_dim = 100
img_shape = (28, 28, 1)
generator = build_generator(latent_dim)
discriminator = build_discriminator(img_shape)
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
discriminator.trainable = False
gan_input = tf.keras.Input(shape=(latent_dim,))
generated_img = generator(gan_input)
validity = discriminator(generated_img)
gan = tf.keras.Model(gan_input, validity)
gan.compile(optimizer='adam', loss='binary_crossentropy')
# Load and preprocess the MNIST dataset
(x_train, _), (_, _) = tf.keras.datasets.mnist.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5 # Normalize to [-1, 1]
x_train = np.expand_dims(x_train, axis=-1)
# Training parameters
epochs = 10000
batch_size = 64
sample_interval = 1000
for epoch in range(epochs):
# Train the discriminator
idx = np.random.randint(0, x_train.shape[0], batch_size)
real_images = x_train[idx]
noise = np.random.normal(0, 1, (batch_size, latent_dim))
fake_images = generator.predict(noise)
d_loss_real = discriminator.train_on_batch(real_images, np.ones((batch_size, 1)))
d_loss_fake = discriminator.train_on_batch(fake_images, np.zeros((batch_size, 1)))
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
# Train the generator
noise = np.random.normal(0, 1, (batch_size, latent_dim))
g_loss = gan.train_on_batch(noise, np.ones((batch_size, 1)))
# Print progress
if epoch % sample_interval == 0:
print(f"{epoch} [D loss: {d_loss[0]}, acc.: {d_loss[1] * 100}%] [G loss: {g_loss}]")
# Generate and save images
noise = np.random.normal(0, 1, (10, latent_dim))
generated_images = generator.predict(noise)
fig, axs = plt.subplots(1, 10, figsize=(20, 2))
for i, img in enumerate(generated_images):
axs[i].imshow(img.squeeze(), cmap='gray')
axs[i].axis('off')
plt.show()
Exercise 2: Implement and Evaluate a DCGAN
Task: Implement a Deep Convolutional GAN (DCGAN) to generate 64x64 RGB images. Evaluate the model using Inception Score (IS) and Fréchet Inception Distance (FID).
Solution:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from scipy.linalg import sqrtm
# Define DCGAN generator model
def build_dcgan_generator(latent_dim):
model = tf.keras.Sequential([
tf.keras.layers.Dense(256 * 8 * 8, activation="relu", input_dim=latent_dim),
tf.keras.layers.Reshape((8, 8, 256)),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Conv2DTranspose(3, kernel_size=4, strides=2, padding='same', activation='tanh')
])
return model
# Define DCGAN discriminator model
def build_dcgan_discriminator(img_shape):
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same', input_shape=img_shape),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Conv2D(256, kernel_size=4, strides=2, padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(alpha=0.2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(1, activation='sigmoid')
])
return model
# Training parameters
latent_dim = 100
img_shape = (64, 64, 3)
epochs = 10000
batch_size = 64
sample_interval = 1000
# Instantiate the DCGAN
generator = build_dcgan_generator(latent_dim)
discriminator = build_dcgan_discriminator(img_shape)
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
discriminator.trainable = False
gan_input = tf.keras.Input(shape=(latent_dim,))
generated_img = generator(gan_input)
validity = discriminator(generated_img)
dcgan = tf.keras.Model(gan_input, validity)
dcgan.compile(optimizer='adam', loss='binary_crossentropy')
# Load and preprocess the dataset (e.g., CIFAR-10)
(x_train, _), (_, _) = tf.keras.datasets.cifar10.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5 # Normalize to [-1, 1]
# Training loop
for epoch in range(epochs):
# Train the discriminator
idx = np.random.randint(0, x_train.shape[0], batch_size)
real_images = x_train[idx]
noise = np.random.normal(0, 1, (batch_size, latent_dim))
fake_images = generator.predict(noise)
d_loss_real = discriminator.train_on_batch(real_images, np.ones((batch_size, 1)))
d_loss_fake = discriminator.train_on_batch(fake_images, np.zeros((batch_size, 1)))
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
# Train the generator
noise = np.random.normal(0, 1, (batch_size, latent_dim))
g_loss = dcgan.train_on_batch(noise, np.ones((batch_size, 1)))
# Print progress
if epoch % sample_interval == 0:
print(f"{epoch} [D loss: {d_loss[0]}, acc.: {d_loss[1] * 100}%] [G loss: {g_loss}]")
# Generate and save images
noise = np.random.normal(0, 1, (10, latent_dim))
generated_images = generator.predict(noise)
fig, axs = plt.subplots(1, 10, figsize=(20, 2))
for i, img in enumerate(generated_images):
axs[i].imshow((img * 127.5 + 127.5).astype(np.uint8))
axs[i].axis('off')
plt.show()
# Function to calculate Inception Score
def calculate_inception_score(images, num_splits=10):
model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
images = tf.image.resize(images, (299, 299))
images = preprocess_input(images)
preds = model.predict(images)
scores = []
for i in range(num_splits):
part = preds[i * len(preds) // num_splits: (i + 1) * len(preds) // num_splits]
py = np.mean(part, axis=0)
scores.append(np.exp(np.mean([np.sum(p * np.log(p / py)) for p in part])))
return np.mean(scores), np.std(scores)
# Function to calculate FID score
def calculate_fid(real_images, generated_images):
model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
real_images = tf.image.resize(real_images, (299, 299))
real_images = preprocess_input(real_images)
gen_images = tf.image.resize(generated_images, (299, 299))
gen_images = preprocess_input(gen_images)
act1 = model.predict(real_images)
act2 = model.predict(gen_images)
mu1, sigma1 = act1.mean(axis=0), np.cov(act1, rowvar=False)
mu2, sigma2 = act2.mean(axis=0), np.cov(act2, rowvar=False)
ssdiff = np.sum((mu1 - mu2) ** 2.0)
covmean = sqrtm(sigma1.dot(sigma2))
if np.iscomplexobj(covmean):
covmean = covmean.real
fid = ssdiff + np.trace(sigma1 + sigma2 - 2.0 * covmean)
return fid
# Generate some fake images using the trained GAN generator
noise = np.random.normal(0, 1, (1000, latent_dim))
generated_images = generator.predict(noise)
# Calculate Inception Score
is_mean, is_std = calculate_inception_score(generated_images)
print(f"Inception Score: {is_mean} ± {is_std}")
# Calculate FID Score
real_images = x_train[np.random.choice(x_train.shape[0], 1000, replace=False)]
fid_score = calculate_fid(real_images, generated_images)
print(f"FID Score: {fid_score}")
Exercise 3: Implement and Train a CycleGAN
Task: Implement and train a CycleGAN to perform image-to-image translation between two domains, such as translating photos to paintings.
Solution:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Define CycleGAN generator model
def build_cyclegan_generator(img_shape):
input_img = tf.keras.Input(shape=img_shape)
x = tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same')(input_img)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same')(x)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same')(x)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
x = tf.keras.layers.BatchNormalization()(x)
output_img = tf.keras.layers.Conv2DTranspose(3, kernel_size=4, strides=2, padding='same', activation='tanh')(x)
return tf.keras.Model(input_img, output_img)
# Define CycleGAN discriminator model
def build_cyclegan_discriminator(img_shape):
input_img = tf.keras.Input(shape=img_shape)
x = tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same')(input_img)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
x = tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same')(x)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
x = tf.keras.layers.Flatten()(x)
validity = tf.keras.layers.Dense(1, activation='sigmoid')(x)
return tf.keras.Model(input_img, validity)
# Build CycleGAN models
img_shape = (128, 128, 3)
G_AB = build_cyclegan_generator(img_shape)
G_BA = build_cyclegan_generator(img_shape)
D_A = build_cyclegan_discriminator(img_shape)
D_B = build_cyclegan_discriminator(img_shape)
D_A.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
D_B.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# CycleGAN loss
def cycle_loss(y_true, y_pred):
return tf.reduce_mean(tf.abs(y_true - y_pred))
# Full CycleGAN model
img_A = tf.keras.Input(shape=img_shape)
img_B = tf.keras.Input(shape=img_shape)
fake_B = G_AB(img_A)
reconstr_A = G_BA(fake_B)
fake_A = G_BA(img_B)
reconstr_B = G_AB(fake_A)
D_A.trainable = False
D_B.trainable = False
valid_A = D_A(fake_A)
valid_B = D_B(fake_B)
cycle_gan = tf.keras.Model(inputs=[img_A, img_B], outputs=[valid_A, valid_B, reconstr_A, reconstr_B])
cycle_gan.compile(optimizer='adam', loss=['binary_crossentropy', 'binary_crossentropy', cycle_loss, cycle_loss])
# Summary of the models
G_AB.summary()
G_BA.summary()
D_A.summary()
D_B.summary()
cycle_gan.summary()
# Training parameters
epochs = 10000
batch_size = 64
sample_interval = 1000
# Load and preprocess the dataset (e.g., two image domains such as photos and paintings)
# Placeholder code for dataset loading
domain_A = ... # Load your domain A images
domain_B = ... # Load your domain B images
# Training loop
for epoch in range(epochs):
# Train the discriminators
idx_A = np.random.randint(0, domain_A.shape[0], batch_size)
idx_B = np.random.randint(0, domain_B.shape[0], batch_size)
real_A = domain_A[idx_A]
real_B = domain_B[idx_B]
fake_B = G_AB.predict(real_A)
fake_A = G_BA.predict(real_B)
dA_loss_real = D_A.train_on_batch(real_A, np.ones((batch_size, 1)))
dA_loss_fake = D_A.train_on_batch(fake_A, np.zeros((batch_size, 1)))
dA_loss = 0.5 * np.add(dA_loss_real, dA_loss_fake)
dB_loss_real = D_B.train_on_batch(real_B, np.ones((batch_size, 1)))
dB_loss_fake = D_B.train_on_batch(fake_B, np.zeros((batch_size, 1)))
dB_loss = 0.5 * np.add(dB_loss_real, dB_loss_fake)
# Train the generators
g_loss = cycle_gan.train_on_batch([real_A, real_B], [np.ones((batch_size, 1)), np.ones((batch_size, 1)), real_A, real_B])
# Print progress
if epoch % sample_interval == 0:
print(f"{epoch} [D_A loss: {dA_loss[0]}, acc.: {dA_loss[1] * 100}%] [D_B loss: {dB_loss[0]}, acc.: {dB_loss[1] * 100}%] [G loss: {g_loss}]")
# Generate and save translated images
fake_B = G_AB.predict(real_A)
fake_A = G_BA.predict(real_B)
fig, axs = plt.subplots(2, 10, figsize=(20, 4))
for i in range(10):
axs[0, i].imshow(fake_B[i])
axs[0, i].axis('off')
axs[1, i].imshow(fake_A[i])
axs[1, i].axis('off')
plt.show()
Exercise 4: Implement a Conditional GAN (cGAN)
Task: Implement a Conditional GAN (cGAN) to generate images conditioned on class labels from the MNIST dataset.
Solution:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Define Conditional GAN generator model
def build_cgan_generator(latent_dim, num_classes, img_shape):
noise = tf.keras.Input(shape=(latent_dim,))
label = tf.keras.Input(shape=(1,), dtype='int32')
label_embedding = tf.keras.layers.Flatten()(tf.keras.layers.Embedding(num_classes, latent_dim)(label))
model_input = tf.keras.layers.multiply([noise, label_embedding])
x = tf.keras.layers.Dense(256 * 7 * 7, activation="relu")(model_input)
x = tf.keras.layers.Reshape((7, 7, 256))(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding='same')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.LeakyReLU(alpha=0.2
)(x)
x = tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
output_img = tf.keras.layers.Conv2DTranspose(img_shape[-1], kernel_size=4, strides=1, padding='same', activation='tanh')(x)
return tf.keras.Model([noise, label], output_img)
# Define Conditional GAN discriminator model
def build_cgan_discriminator(img_shape, num_classes):
img = tf.keras.Input(shape=img_shape)
label = tf.keras.Input(shape=(1,), dtype='int32')
label_embedding = tf.keras.layers.Flatten()(tf.keras.layers.Embedding(num_classes, np.prod(img_shape))(label))
label_embedding = tf.keras.layers.Reshape(img_shape)(label_embedding)
model_input = tf.keras.layers.multiply([img, label_embedding])
x = tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same')(model_input)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
x = tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same')(x)
x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
x = tf.keras.layers.Flatten()(x)
validity = tf.keras.layers.Dense(1, activation='sigmoid')(x)
return tf.keras.Model([img, label], validity)
# Build and compile the Conditional GAN
latent_dim = 100
num_classes = 10
img_shape = (28, 28, 1)
generator = build_cgan_generator(latent_dim, num_classes, img_shape)
discriminator = build_cgan_discriminator(img_shape, num_classes)
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
discriminator.trainable = False
noise = tf.keras.Input(shape=(latent_dim,))
label = tf.keras.Input(shape=(1,), dtype='int32')
generated_img = generator([noise, label])
validity = discriminator([generated_img, label])
cgan = tf.keras.Model([noise, label], validity)
cgan.compile(optimizer='adam', loss='binary_crossentropy')
# Load and preprocess the MNIST dataset
(x_train, y_train), (_, _) = tf.keras.datasets.mnist.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5 # Normalize to [-1, 1]
x_train = np.expand_dims(x_train, axis=-1)
# Training parameters
epochs = 10000
batch_size = 64
sample_interval = 1000
# Training loop
for epoch in range(epochs):
# Train the discriminator
idx = np.random.randint(0, x_train.shape[0], batch_size)
real_images = x_train[idx]
real_labels = y_train[idx]
noise = np.random.normal(0, 1, (batch_size, latent_dim))
fake_labels = np.random.randint(0, num_classes, batch_size)
fake_images = generator.predict([noise, fake_labels])
d_loss_real = discriminator.train_on_batch([real_images, real_labels], np.ones((batch_size, 1)))
d_loss_fake = discriminator.train_on_batch([fake_images, fake_labels], np.zeros((batch_size, 1)))
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
# Train the generator
noise = np.random.normal(0, 1, (batch_size, latent_dim))
sampled_labels = np.random.randint(0, num_classes, batch_size)
g_loss = cgan.train_on_batch([noise, sampled_labels], np.ones((batch_size, 1)))
# Print progress
if epoch % sample_interval == 0:
print(f"{epoch} [D loss: {d_loss[0]}, acc.: {d_loss[1] * 100}%] [G loss: {g_loss}]")
# Generate and save images
noise = np.random.normal(0, 1, (10, latent_dim))
sampled_labels = np.arange(0, 10).reshape(-1, 1)
generated_images = generator.predict([noise, sampled_labels])
fig, axs = plt.subplots(1, 10, figsize=(20, 2))
for i, img in enumerate(generated_images):
axs[i].imshow(img.squeeze(), cmap='gray')
axs[i].axis('off')
plt.show()
Exercise 5: Evaluate a GAN Using Inception Score and FID
Task: Evaluate the performance of a trained GAN using Inception Score (IS) and Fréchet Inception Distance (FID) on generated images.
Solution:
import tensorflow as tf
import numpy as np
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from scipy.linalg import sqrtm
# Function to calculate Inception Score
def calculate_inception_score(images, num_splits=10):
model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
images = tf.image.resize(images, (299, 299))
images = preprocess_input(images)
preds = model.predict(images)
scores = []
for i in range(num_splits):
part = preds[i * len(preds) // num_splits: (i + 1) * len(preds) // num_splits]
py = np.mean(part, axis=0)
scores.append(np.exp(np.mean([np.sum(p * np.log(p / py)) for p in part])))
return np.mean(scores), np.std(scores)
# Function to calculate FID score
def calculate_fid(real_images, generated_images):
model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
real_images = tf.image.resize(real_images, (299, 299))
real_images = preprocess_input(real_images)
gen_images = tf.image.resize(generated_images, (299, 299))
gen_images = preprocess_input(gen_images)
act1 = model.predict(real_images)
act2 = model.predict(gen_images)
mu1, sigma1 = act1.mean(axis=0), np.cov(act1, rowvar=False)
mu2, sigma2 = act2.mean(axis=0), np.cov(act2, rowvar=False)
ssdiff = np.sum((mu1 - mu2) ** 2.0)
covmean = sqrtm(sigma1.dot(sigma2))
if np.iscomplexobj(covmean):
covmean = covmean.real
fid = ssdiff + np.trace(sigma1 + sigma2 - 2.0 * covmean)
return fid
# Example: Evaluate a trained GAN on CIFAR-10 dataset
latent_dim = 100
img_shape = (32, 32, 3)
# Load CIFAR-10 dataset
(x_train, _), (_, _) = tf.keras.datasets.cifar10.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5 # Normalize to [-1, 1]
# Assume generator is the trained GAN generator
# Generate some fake images using the trained GAN generator
noise = np.random.normal(0, 1, (1000, latent_dim))
generated_images = generator.predict(noise)
# Calculate Inception Score
is_mean, is_std = calculate_inception_score(generated_images)
print(f"Inception Score: {is_mean} ± {is_std}")
# Calculate FID Score
real_images = x_train[np.random.choice(x_train.shape[0], 1000, replace=False)]
fid_score = calculate_fid(real_images, generated_images)
print(f"FID Score: {fid_score}")
These exercises provide hands-on experience with building, training, and evaluating various types of GANs. By working through these exercises, you will deepen your understanding of GANs and their practical applications in different domains.