Menu iconMenu icon
Aprendizaje Profundo Generativo Edición Actualizada

Capítulo 3: Profundizando en las Redes Generativas Antagónicas (GANs)

Ejercicios Prácticos para el Capítulo 3: Profundización en las Redes Generativas Antagónicas (GANs)

Estos ejercicios prácticos están diseñados para reforzar los conceptos cubiertos en este capítulo. Al trabajar en estos ejercicios, obtendrás experiencia práctica con GANs, incluyendo su arquitectura, entrenamiento, evaluación e innovaciones recientes.

Ejercicio 1: Construir y Entrenar un GAN Básico

Tarea: Construir y entrenar un GAN básico para generar imágenes en escala de grises de 28x28 similares al conjunto de datos MNIST.

Solución:

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

# Define the generator model
def build_generator(latent_dim):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(256 * 7 * 7, activation="relu", input_dim=latent_dim),
        tf.keras.layers.Reshape((7, 7, 256)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Conv2DTranspose(1, kernel_size=4, strides=1, padding='same', activation='tanh')
    ])
    return model

# Define the discriminator model
def build_discriminator(img_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same', input_shape=img_shape),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same'),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    return model

# Instantiate the GAN
latent_dim = 100
img_shape = (28, 28, 1)

generator = build_generator(latent_dim)
discriminator = build_discriminator(img_shape)
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

discriminator.trainable = False
gan_input = tf.keras.Input(shape=(latent_dim,))
generated_img = generator(gan_input)
validity = discriminator(generated_img)
gan = tf.keras.Model(gan_input, validity)
gan.compile(optimizer='adam', loss='binary_crossentropy')

# Load and preprocess the MNIST dataset
(x_train, _), (_, _) = tf.keras.datasets.mnist.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5  # Normalize to [-1, 1]
x_train = np.expand_dims(x_train, axis=-1)

# Training parameters
epochs = 10000
batch_size = 64
sample_interval = 1000

for epoch in range(epochs):
    # Train the discriminator
    idx = np.random.randint(0, x_train.shape[0], batch_size)
    real_images = x_train[idx]
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    fake_images = generator.predict(noise)
    d_loss_real = discriminator.train_on_batch(real_images, np.ones((batch_size, 1)))
    d_loss_fake = discriminator.train_on_batch(fake_images, np.zeros((batch_size, 1)))
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    # Train the generator
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    g_loss = gan.train_on_batch(noise, np.ones((batch_size, 1)))

    # Print progress
    if epoch % sample_interval == 0:
        print(f"{epoch} [D loss: {d_loss[0]}, acc.: {d_loss[1] * 100}%] [G loss: {g_loss}]")

        # Generate and save images
        noise = np.random.normal(0, 1, (10, latent_dim))
        generated_images = generator.predict(noise)
        fig, axs = plt.subplots(1, 10, figsize=(20, 2))
        for i, img in enumerate(generated_images):
            axs[i].imshow(img.squeeze(), cmap='gray')
            axs[i].axis('off')
        plt.show()

Ejercicio 2: Implementar y Evaluar un DCGAN

Tarea: Implementar un Deep Convolutional GAN (DCGAN) para generar imágenes RGB de 64x64. Evaluar el modelo usando la puntuación de Inception (IS) y la distancia de Fréchet Inception (FID).

Solución:

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from scipy.linalg import sqrtm

# Define DCGAN generator model
def build_dcgan_generator(latent_dim):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(256 * 8 * 8, activation="relu", input_dim=latent_dim),
        tf.keras.layers.Reshape((8, 8, 256)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Conv2DTranspose(3, kernel_size=4, strides=2, padding='same', activation='tanh')
    ])
    return model

# Define DCGAN discriminator model
def build_dcgan_discriminator(img_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same', input_shape=img_shape),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Conv2D(256, kernel_size=4, strides=2, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    return model

# Training parameters
latent_dim = 100
img_shape = (64, 64, 3)
epochs = 10000
batch_size = 64
sample_interval = 1000

# Instantiate the DCGAN
generator = build_dcgan_generator(latent_dim)
discriminator = build_dcgan_discriminator(img_shape)
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

discriminator.trainable = False
gan_input = tf.keras.Input(shape=(latent_dim,))
generated_img = generator(gan_input)
validity = discriminator(generated_img)
dcgan = tf.keras.Model(gan_input, validity)
dcgan.compile(optimizer='adam', loss='binary_crossentropy')

# Load and preprocess the dataset (e.g., CIFAR-10)
(x_train, _), (_, _) = tf.keras.datasets.cifar10.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5  # Normalize to [-1, 1]

# Training loop
for epoch in range(epochs):
    # Train the discriminator
    idx = np.random.randint(0, x_train.shape[0], batch_size)
    real_images = x_train[idx]
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    fake_images = generator.predict(noise)
    d_loss_real = discriminator.train_on_batch(real_images, np.ones((batch_size, 1)))
    d_loss_fake = discriminator.train_on_batch(fake_images, np.zeros((batch_size, 1)))
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    # Train the generator
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    g_loss = dcgan.train_on_batch(noise, np.ones((batch_size, 1)))

    # Print progress
    if epoch % sample_interval == 0:
        print(f"{epoch} [D loss: {d_loss[0]}, acc.: {d_loss[1] * 100}%] [G loss: {g_loss}]")

        # Generate and save images
        noise = np.random.normal(0, 1, (10, latent_dim))
        generated_images = generator.predict(noise)
        fig, axs = plt.subplots(1, 10, figsize=(20, 2))
        for i, img in enumerate(generated_images):
            axs[i].imshow((img * 127.5 + 127.5).astype(np.uint8))
            axs[i].axis('off')
        plt.show()

# Function to calculate Inception Score
def calculate_inception_score(images, num_splits=10):
    model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
    images = tf.image.resize(images, (299, 299))


 images = preprocess_input(images)
    preds = model.predict(images)

    scores = []
    for i in range(num_splits):
        part = preds[i * len(preds) // num_splits: (i + 1) * len(preds) // num_splits]
        py = np.mean(part, axis=0)
        scores.append(np.exp(np.mean([np.sum(p * np.log(p / py)) for p in part])))
    return np.mean(scores), np.std(scores)

# Function to calculate FID score
def calculate_fid(real_images, generated_images):
    model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
    real_images = tf.image.resize(real_images, (299, 299))
    real_images = preprocess_input(real_images)
    gen_images = tf.image.resize(generated_images, (299, 299))
    gen_images = preprocess_input(gen_images)

    act1 = model.predict(real_images)
    act2 = model.predict(gen_images)

    mu1, sigma1 = act1.mean(axis=0), np.cov(act1, rowvar=False)
    mu2, sigma2 = act2.mean(axis=0), np.cov(act2, rowvar=False)

    ssdiff = np.sum((mu1 - mu2) ** 2.0)
    covmean = sqrtm(sigma1.dot(sigma2))

    if np.iscomplexobj(covmean):
        covmean = covmean.real

    fid = ssdiff + np.trace(sigma1 + sigma2 - 2.0 * covmean)
    return fid

# Generate some fake images using the trained GAN generator
noise = np.random.normal(0, 1, (1000, latent_dim))
generated_images = generator.predict(noise)

# Calculate Inception Score
is_mean, is_std = calculate_inception_score(generated_images)
print(f"Inception Score: {is_mean} ± {is_std}")

# Calculate FID Score
real_images = x_train[np.random.choice(x_train.shape[0], 1000, replace=False)]
fid_score = calculate_fid(real_images, generated_images)
print(f"FID Score: {fid_score}")

Ejercicio 3: Implementar y Entrenar un CycleGAN

Tarea: Implementar y entrenar un CycleGAN para realizar la traducción de imágenes entre dos dominios, como traducir fotos a pinturas.

Solución:

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

# Define CycleGAN generator model
def build_cyclegan_generator(img_shape):
    input_img = tf.keras.Input(shape=img_shape)
    x = tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same')(input_img)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same')(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same')(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    output_img = tf.keras.layers.Conv2DTranspose(3, kernel_size=4, strides=2, padding='same', activation='tanh')(x)
    return tf.keras.Model(input_img, output_img)

# Define CycleGAN discriminator model
def build_cyclegan_discriminator(img_shape):
    input_img = tf.keras.Input(shape=img_shape)
    x = tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same')(input_img)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same')(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.Flatten()(x)
    validity = tf.keras.layers.Dense(1, activation='sigmoid')(x)
    return tf.keras.Model(input_img, validity)

# Build CycleGAN models
img_shape = (128, 128, 3)
G_AB = build_cyclegan_generator(img_shape)
G_BA = build_cyclegan_generator(img_shape)
D_A = build_cyclegan_discriminator(img_shape)
D_B = build_cyclegan_discriminator(img_shape)

D_A.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
D_B.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# CycleGAN loss
def cycle_loss(y_true, y_pred):
    return tf.reduce_mean(tf.abs(y_true - y_pred))

# Full CycleGAN model
img_A = tf.keras.Input(shape=img_shape)
img_B = tf.keras.Input(shape=img_shape)

fake_B = G_AB(img_A)
reconstr_A = G_BA(fake_B)
fake_A = G_BA(img_B)
reconstr_B = G_AB(fake_A)

D_A.trainable = False
D_B.trainable = False

valid_A = D_A(fake_A)
valid_B = D_B(fake_B)

cycle_gan = tf.keras.Model(inputs=[img_A, img_B], outputs=[valid_A, valid_B, reconstr_A, reconstr_B])
cycle_gan.compile(optimizer='adam', loss=['binary_crossentropy', 'binary_crossentropy', cycle_loss, cycle_loss])

# Summary of the models
G_AB.summary()
G_BA.summary()
D_A.summary()
D_B.summary()
cycle_gan.summary()

# Training parameters
epochs = 10000
batch_size = 64
sample_interval = 1000

# Load and preprocess the dataset (e.g., two image domains such as photos and paintings)
# Placeholder code for dataset loading
domain_A = ...  # Load your domain A images
domain_B = ...  # Load your domain B images

# Training loop
for epoch in range(epochs):
    # Train the discriminators
    idx_A = np.random.randint(0, domain_A.shape[0], batch_size)
    idx_B = np.random.randint(0, domain_B.shape[0], batch_size)
    real_A = domain_A[idx_A]
    real_B = domain_B[idx_B]

    fake_B = G_AB.predict(real_A)
    fake_A = G_BA.predict(real_B)

    dA_loss_real = D_A.train_on_batch(real_A, np.ones((batch_size, 1)))
    dA_loss_fake = D_A.train_on_batch(fake_A, np.zeros((batch_size, 1)))
    dA_loss = 0.5 * np.add(dA_loss_real, dA_loss_fake)

    dB_loss_real = D_B.train_on_batch(real_B, np.ones((batch_size, 1)))
    dB_loss_fake = D_B.train_on_batch(fake_B, np.zeros((batch_size, 1)))
    dB_loss = 0.5 * np.add(dB_loss_real, dB_loss_fake)

    # Train the generators
    g_loss = cycle_gan.train_on_batch([real_A, real_B], [np.ones((batch_size, 1)), np.ones((batch_size, 1)), real_A, real_B])

    # Print progress
    if epoch % sample_interval == 0:
        print(f"{epoch} [D_A loss: {dA_loss[0]}, acc.: {dA_loss[1] * 100}%] [D_B loss: {dB_loss[0]}, acc.: {dB_loss[1] * 100}%] [G loss: {g_loss}]")

        # Generate and save translated images
        fake_B = G_AB.predict(real_A)
        fake_A = G_BA.predict(real_B)
        fig, axs = plt.subplots(2, 10, figsize=(20, 4))
        for i in range(10):
            axs[0, i].imshow(fake_B[i])
            axs[0, i].axis('off')
            axs[1, i].imshow(fake_A[i])
            axs[1, i].axis('off')
        plt.show()

Ejercicio 4: Implementar un Conditional GAN (cGAN)

Tarea: Implementar un Conditional GAN (cGAN) para generar imágenes condicionadas por etiquetas de clase del conjunto de datos MNIST.

Solución:

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

# Define Conditional GAN generator model
def build_cgan_generator(latent_dim, num_classes, img_shape):
    noise = tf.keras.Input(shape=(latent_dim,))
    label = tf.keras.Input(shape=(1,), dtype='int32')
    label_embedding = tf.keras.layers.Flatten()(tf.keras.layers.Embedding(num_classes, latent_dim)(label))
    model_input = tf.keras.layers.multiply([noise, label_embedding])

    x = tf.keras.layers.Dense(256 * 7 * 7, activation="relu")(model_input)
    x = tf.keras.layers.Reshape((7, 7, 256))(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2

)(x)
    x = tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    output_img = tf.keras.layers.Conv2DTranspose(img_shape[-1], kernel_size=4, strides=1, padding='same', activation='tanh')(x)

    return tf.keras.Model([noise, label], output_img)

# Define Conditional GAN discriminator model
def build_cgan_discriminator(img_shape, num_classes):
    img = tf.keras.Input(shape=img_shape)
    label = tf.keras.Input(shape=(1,), dtype='int32')
    label_embedding = tf.keras.layers.Flatten()(tf.keras.layers.Embedding(num_classes, np.prod(img_shape))(label))
    label_embedding = tf.keras.layers.Reshape(img_shape)(label_embedding)
    model_input = tf.keras.layers.multiply([img, label_embedding])

    x = tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same')(model_input)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same')(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.Flatten()(x)
    validity = tf.keras.layers.Dense(1, activation='sigmoid')(x)

    return tf.keras.Model([img, label], validity)

# Build and compile the Conditional GAN
latent_dim = 100
num_classes = 10
img_shape = (28, 28, 1)

generator = build_cgan_generator(latent_dim, num_classes, img_shape)
discriminator = build_cgan_discriminator(img_shape, num_classes)
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

discriminator.trainable = False
noise = tf.keras.Input(shape=(latent_dim,))
label = tf.keras.Input(shape=(1,), dtype='int32')
generated_img = generator([noise, label])
validity = discriminator([generated_img, label])
cgan = tf.keras.Model([noise, label], validity)
cgan.compile(optimizer='adam', loss='binary_crossentropy')

# Load and preprocess the MNIST dataset
(x_train, y_train), (_, _) = tf.keras.datasets.mnist.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5  # Normalize to [-1, 1]
x_train = np.expand_dims(x_train, axis=-1)

# Training parameters
epochs = 10000
batch_size = 64
sample_interval = 1000

# Training loop
for epoch in range(epochs):
    # Train the discriminator
    idx = np.random.randint(0, x_train.shape[0], batch_size)
    real_images = x_train[idx]
    real_labels = y_train[idx]
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    fake_labels = np.random.randint(0, num_classes, batch_size)
    fake_images = generator.predict([noise, fake_labels])
    d_loss_real = discriminator.train_on_batch([real_images, real_labels], np.ones((batch_size, 1)))
    d_loss_fake = discriminator.train_on_batch([fake_images, fake_labels], np.zeros((batch_size, 1)))
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    # Train the generator
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    sampled_labels = np.random.randint(0, num_classes, batch_size)
    g_loss = cgan.train_on_batch([noise, sampled_labels], np.ones((batch_size, 1)))

    # Print progress
    if epoch % sample_interval == 0:
        print(f"{epoch} [D loss: {d_loss[0]}, acc.: {d_loss[1] * 100}%] [G loss: {g_loss}]")

        # Generate and save images
        noise = np.random.normal(0, 1, (10, latent_dim))
        sampled_labels = np.arange(0, 10).reshape(-1, 1)
        generated_images = generator.predict([noise, sampled_labels])
        fig, axs = plt.subplots(1, 10, figsize=(20, 2))
        for i, img in enumerate(generated_images):
            axs[i].imshow(img.squeeze(), cmap='gray')
            axs[i].axis('off')
        plt.show()

Ejercicio 5: Evaluar un GAN Usando Inception Score y FID

Tarea: Evaluar el rendimiento de un GAN entrenado utilizando Inception Score (IS) y Fréchet Inception Distance (FID) en imágenes generadas.

Solución:

import tensorflow as tf
import numpy as np
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from scipy.linalg import sqrtm

# Function to calculate Inception Score
def calculate_inception_score(images, num_splits=10):
    model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
    images = tf.image.resize(images, (299, 299))
    images = preprocess_input(images)
    preds = model.predict(images)

    scores = []
    for i in range(num_splits):
        part = preds[i * len(preds) // num_splits: (i + 1) * len(preds) // num_splits]
        py = np.mean(part, axis=0)
        scores.append(np.exp(np.mean([np.sum(p * np.log(p / py)) for p in part])))
    return np.mean(scores), np.std(scores)

# Function to calculate FID score
def calculate_fid(real_images, generated_images):
    model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
    real_images = tf.image.resize(real_images, (299, 299))
    real_images = preprocess_input(real_images)
    gen_images = tf.image.resize(generated_images, (299, 299))
    gen_images = preprocess_input(gen_images)

    act1 = model.predict(real_images)
    act2 = model.predict(gen_images)

    mu1, sigma1 = act1.mean(axis=0), np.cov(act1, rowvar=False)
    mu2, sigma2 = act2.mean(axis=0), np.cov(act2, rowvar=False)

    ssdiff = np.sum((mu1 - mu2) ** 2.0)
    covmean = sqrtm(sigma1.dot(sigma2))

    if np.iscomplexobj(covmean):
        covmean = covmean.real

    fid = ssdiff + np.trace(sigma1 + sigma2 - 2.0 * covmean)
    return fid

# Example: Evaluate a trained GAN on CIFAR-10 dataset
latent_dim = 100
img_shape = (32, 32, 3)

# Load CIFAR-10 dataset
(x_train, _), (_, _) = tf.keras.datasets.cifar10.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5  # Normalize to [-1, 1]

# Assume generator is the trained GAN generator
# Generate some fake images using the trained GAN generator
noise = np.random.normal(0, 1, (1000, latent_dim))
generated_images = generator.predict(noise)

# Calculate Inception Score
is_mean, is_std = calculate_inception_score(generated_images)
print(f"Inception Score: {is_mean} ± {is_std}")

# Calculate FID Score
real_images = x_train[np.random.choice(x_train.shape[0], 1000, replace=False)]
fid_score = calculate_fid(real_images, generated_images)
print(f"FID Score: {fid_score}")

Estos ejercicios brindan experiencia práctica en la construcción, el entrenamiento y la evaluación de varios tipos de GANs. Al trabajar a través de estos ejercicios, profundizarás tu comprensión de los GANs y sus aplicaciones prácticas en diferentes dominios.

Ejercicios Prácticos para el Capítulo 3: Profundización en las Redes Generativas Antagónicas (GANs)

Estos ejercicios prácticos están diseñados para reforzar los conceptos cubiertos en este capítulo. Al trabajar en estos ejercicios, obtendrás experiencia práctica con GANs, incluyendo su arquitectura, entrenamiento, evaluación e innovaciones recientes.

Ejercicio 1: Construir y Entrenar un GAN Básico

Tarea: Construir y entrenar un GAN básico para generar imágenes en escala de grises de 28x28 similares al conjunto de datos MNIST.

Solución:

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

# Define the generator model
def build_generator(latent_dim):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(256 * 7 * 7, activation="relu", input_dim=latent_dim),
        tf.keras.layers.Reshape((7, 7, 256)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Conv2DTranspose(1, kernel_size=4, strides=1, padding='same', activation='tanh')
    ])
    return model

# Define the discriminator model
def build_discriminator(img_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same', input_shape=img_shape),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same'),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    return model

# Instantiate the GAN
latent_dim = 100
img_shape = (28, 28, 1)

generator = build_generator(latent_dim)
discriminator = build_discriminator(img_shape)
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

discriminator.trainable = False
gan_input = tf.keras.Input(shape=(latent_dim,))
generated_img = generator(gan_input)
validity = discriminator(generated_img)
gan = tf.keras.Model(gan_input, validity)
gan.compile(optimizer='adam', loss='binary_crossentropy')

# Load and preprocess the MNIST dataset
(x_train, _), (_, _) = tf.keras.datasets.mnist.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5  # Normalize to [-1, 1]
x_train = np.expand_dims(x_train, axis=-1)

# Training parameters
epochs = 10000
batch_size = 64
sample_interval = 1000

for epoch in range(epochs):
    # Train the discriminator
    idx = np.random.randint(0, x_train.shape[0], batch_size)
    real_images = x_train[idx]
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    fake_images = generator.predict(noise)
    d_loss_real = discriminator.train_on_batch(real_images, np.ones((batch_size, 1)))
    d_loss_fake = discriminator.train_on_batch(fake_images, np.zeros((batch_size, 1)))
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    # Train the generator
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    g_loss = gan.train_on_batch(noise, np.ones((batch_size, 1)))

    # Print progress
    if epoch % sample_interval == 0:
        print(f"{epoch} [D loss: {d_loss[0]}, acc.: {d_loss[1] * 100}%] [G loss: {g_loss}]")

        # Generate and save images
        noise = np.random.normal(0, 1, (10, latent_dim))
        generated_images = generator.predict(noise)
        fig, axs = plt.subplots(1, 10, figsize=(20, 2))
        for i, img in enumerate(generated_images):
            axs[i].imshow(img.squeeze(), cmap='gray')
            axs[i].axis('off')
        plt.show()

Ejercicio 2: Implementar y Evaluar un DCGAN

Tarea: Implementar un Deep Convolutional GAN (DCGAN) para generar imágenes RGB de 64x64. Evaluar el modelo usando la puntuación de Inception (IS) y la distancia de Fréchet Inception (FID).

Solución:

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from scipy.linalg import sqrtm

# Define DCGAN generator model
def build_dcgan_generator(latent_dim):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(256 * 8 * 8, activation="relu", input_dim=latent_dim),
        tf.keras.layers.Reshape((8, 8, 256)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Conv2DTranspose(3, kernel_size=4, strides=2, padding='same', activation='tanh')
    ])
    return model

# Define DCGAN discriminator model
def build_dcgan_discriminator(img_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same', input_shape=img_shape),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Conv2D(256, kernel_size=4, strides=2, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    return model

# Training parameters
latent_dim = 100
img_shape = (64, 64, 3)
epochs = 10000
batch_size = 64
sample_interval = 1000

# Instantiate the DCGAN
generator = build_dcgan_generator(latent_dim)
discriminator = build_dcgan_discriminator(img_shape)
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

discriminator.trainable = False
gan_input = tf.keras.Input(shape=(latent_dim,))
generated_img = generator(gan_input)
validity = discriminator(generated_img)
dcgan = tf.keras.Model(gan_input, validity)
dcgan.compile(optimizer='adam', loss='binary_crossentropy')

# Load and preprocess the dataset (e.g., CIFAR-10)
(x_train, _), (_, _) = tf.keras.datasets.cifar10.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5  # Normalize to [-1, 1]

# Training loop
for epoch in range(epochs):
    # Train the discriminator
    idx = np.random.randint(0, x_train.shape[0], batch_size)
    real_images = x_train[idx]
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    fake_images = generator.predict(noise)
    d_loss_real = discriminator.train_on_batch(real_images, np.ones((batch_size, 1)))
    d_loss_fake = discriminator.train_on_batch(fake_images, np.zeros((batch_size, 1)))
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    # Train the generator
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    g_loss = dcgan.train_on_batch(noise, np.ones((batch_size, 1)))

    # Print progress
    if epoch % sample_interval == 0:
        print(f"{epoch} [D loss: {d_loss[0]}, acc.: {d_loss[1] * 100}%] [G loss: {g_loss}]")

        # Generate and save images
        noise = np.random.normal(0, 1, (10, latent_dim))
        generated_images = generator.predict(noise)
        fig, axs = plt.subplots(1, 10, figsize=(20, 2))
        for i, img in enumerate(generated_images):
            axs[i].imshow((img * 127.5 + 127.5).astype(np.uint8))
            axs[i].axis('off')
        plt.show()

# Function to calculate Inception Score
def calculate_inception_score(images, num_splits=10):
    model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
    images = tf.image.resize(images, (299, 299))


 images = preprocess_input(images)
    preds = model.predict(images)

    scores = []
    for i in range(num_splits):
        part = preds[i * len(preds) // num_splits: (i + 1) * len(preds) // num_splits]
        py = np.mean(part, axis=0)
        scores.append(np.exp(np.mean([np.sum(p * np.log(p / py)) for p in part])))
    return np.mean(scores), np.std(scores)

# Function to calculate FID score
def calculate_fid(real_images, generated_images):
    model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
    real_images = tf.image.resize(real_images, (299, 299))
    real_images = preprocess_input(real_images)
    gen_images = tf.image.resize(generated_images, (299, 299))
    gen_images = preprocess_input(gen_images)

    act1 = model.predict(real_images)
    act2 = model.predict(gen_images)

    mu1, sigma1 = act1.mean(axis=0), np.cov(act1, rowvar=False)
    mu2, sigma2 = act2.mean(axis=0), np.cov(act2, rowvar=False)

    ssdiff = np.sum((mu1 - mu2) ** 2.0)
    covmean = sqrtm(sigma1.dot(sigma2))

    if np.iscomplexobj(covmean):
        covmean = covmean.real

    fid = ssdiff + np.trace(sigma1 + sigma2 - 2.0 * covmean)
    return fid

# Generate some fake images using the trained GAN generator
noise = np.random.normal(0, 1, (1000, latent_dim))
generated_images = generator.predict(noise)

# Calculate Inception Score
is_mean, is_std = calculate_inception_score(generated_images)
print(f"Inception Score: {is_mean} ± {is_std}")

# Calculate FID Score
real_images = x_train[np.random.choice(x_train.shape[0], 1000, replace=False)]
fid_score = calculate_fid(real_images, generated_images)
print(f"FID Score: {fid_score}")

Ejercicio 3: Implementar y Entrenar un CycleGAN

Tarea: Implementar y entrenar un CycleGAN para realizar la traducción de imágenes entre dos dominios, como traducir fotos a pinturas.

Solución:

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

# Define CycleGAN generator model
def build_cyclegan_generator(img_shape):
    input_img = tf.keras.Input(shape=img_shape)
    x = tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same')(input_img)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same')(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same')(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    output_img = tf.keras.layers.Conv2DTranspose(3, kernel_size=4, strides=2, padding='same', activation='tanh')(x)
    return tf.keras.Model(input_img, output_img)

# Define CycleGAN discriminator model
def build_cyclegan_discriminator(img_shape):
    input_img = tf.keras.Input(shape=img_shape)
    x = tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same')(input_img)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same')(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.Flatten()(x)
    validity = tf.keras.layers.Dense(1, activation='sigmoid')(x)
    return tf.keras.Model(input_img, validity)

# Build CycleGAN models
img_shape = (128, 128, 3)
G_AB = build_cyclegan_generator(img_shape)
G_BA = build_cyclegan_generator(img_shape)
D_A = build_cyclegan_discriminator(img_shape)
D_B = build_cyclegan_discriminator(img_shape)

D_A.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
D_B.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# CycleGAN loss
def cycle_loss(y_true, y_pred):
    return tf.reduce_mean(tf.abs(y_true - y_pred))

# Full CycleGAN model
img_A = tf.keras.Input(shape=img_shape)
img_B = tf.keras.Input(shape=img_shape)

fake_B = G_AB(img_A)
reconstr_A = G_BA(fake_B)
fake_A = G_BA(img_B)
reconstr_B = G_AB(fake_A)

D_A.trainable = False
D_B.trainable = False

valid_A = D_A(fake_A)
valid_B = D_B(fake_B)

cycle_gan = tf.keras.Model(inputs=[img_A, img_B], outputs=[valid_A, valid_B, reconstr_A, reconstr_B])
cycle_gan.compile(optimizer='adam', loss=['binary_crossentropy', 'binary_crossentropy', cycle_loss, cycle_loss])

# Summary of the models
G_AB.summary()
G_BA.summary()
D_A.summary()
D_B.summary()
cycle_gan.summary()

# Training parameters
epochs = 10000
batch_size = 64
sample_interval = 1000

# Load and preprocess the dataset (e.g., two image domains such as photos and paintings)
# Placeholder code for dataset loading
domain_A = ...  # Load your domain A images
domain_B = ...  # Load your domain B images

# Training loop
for epoch in range(epochs):
    # Train the discriminators
    idx_A = np.random.randint(0, domain_A.shape[0], batch_size)
    idx_B = np.random.randint(0, domain_B.shape[0], batch_size)
    real_A = domain_A[idx_A]
    real_B = domain_B[idx_B]

    fake_B = G_AB.predict(real_A)
    fake_A = G_BA.predict(real_B)

    dA_loss_real = D_A.train_on_batch(real_A, np.ones((batch_size, 1)))
    dA_loss_fake = D_A.train_on_batch(fake_A, np.zeros((batch_size, 1)))
    dA_loss = 0.5 * np.add(dA_loss_real, dA_loss_fake)

    dB_loss_real = D_B.train_on_batch(real_B, np.ones((batch_size, 1)))
    dB_loss_fake = D_B.train_on_batch(fake_B, np.zeros((batch_size, 1)))
    dB_loss = 0.5 * np.add(dB_loss_real, dB_loss_fake)

    # Train the generators
    g_loss = cycle_gan.train_on_batch([real_A, real_B], [np.ones((batch_size, 1)), np.ones((batch_size, 1)), real_A, real_B])

    # Print progress
    if epoch % sample_interval == 0:
        print(f"{epoch} [D_A loss: {dA_loss[0]}, acc.: {dA_loss[1] * 100}%] [D_B loss: {dB_loss[0]}, acc.: {dB_loss[1] * 100}%] [G loss: {g_loss}]")

        # Generate and save translated images
        fake_B = G_AB.predict(real_A)
        fake_A = G_BA.predict(real_B)
        fig, axs = plt.subplots(2, 10, figsize=(20, 4))
        for i in range(10):
            axs[0, i].imshow(fake_B[i])
            axs[0, i].axis('off')
            axs[1, i].imshow(fake_A[i])
            axs[1, i].axis('off')
        plt.show()

Ejercicio 4: Implementar un Conditional GAN (cGAN)

Tarea: Implementar un Conditional GAN (cGAN) para generar imágenes condicionadas por etiquetas de clase del conjunto de datos MNIST.

Solución:

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

# Define Conditional GAN generator model
def build_cgan_generator(latent_dim, num_classes, img_shape):
    noise = tf.keras.Input(shape=(latent_dim,))
    label = tf.keras.Input(shape=(1,), dtype='int32')
    label_embedding = tf.keras.layers.Flatten()(tf.keras.layers.Embedding(num_classes, latent_dim)(label))
    model_input = tf.keras.layers.multiply([noise, label_embedding])

    x = tf.keras.layers.Dense(256 * 7 * 7, activation="relu")(model_input)
    x = tf.keras.layers.Reshape((7, 7, 256))(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2

)(x)
    x = tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    output_img = tf.keras.layers.Conv2DTranspose(img_shape[-1], kernel_size=4, strides=1, padding='same', activation='tanh')(x)

    return tf.keras.Model([noise, label], output_img)

# Define Conditional GAN discriminator model
def build_cgan_discriminator(img_shape, num_classes):
    img = tf.keras.Input(shape=img_shape)
    label = tf.keras.Input(shape=(1,), dtype='int32')
    label_embedding = tf.keras.layers.Flatten()(tf.keras.layers.Embedding(num_classes, np.prod(img_shape))(label))
    label_embedding = tf.keras.layers.Reshape(img_shape)(label_embedding)
    model_input = tf.keras.layers.multiply([img, label_embedding])

    x = tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same')(model_input)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same')(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.Flatten()(x)
    validity = tf.keras.layers.Dense(1, activation='sigmoid')(x)

    return tf.keras.Model([img, label], validity)

# Build and compile the Conditional GAN
latent_dim = 100
num_classes = 10
img_shape = (28, 28, 1)

generator = build_cgan_generator(latent_dim, num_classes, img_shape)
discriminator = build_cgan_discriminator(img_shape, num_classes)
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

discriminator.trainable = False
noise = tf.keras.Input(shape=(latent_dim,))
label = tf.keras.Input(shape=(1,), dtype='int32')
generated_img = generator([noise, label])
validity = discriminator([generated_img, label])
cgan = tf.keras.Model([noise, label], validity)
cgan.compile(optimizer='adam', loss='binary_crossentropy')

# Load and preprocess the MNIST dataset
(x_train, y_train), (_, _) = tf.keras.datasets.mnist.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5  # Normalize to [-1, 1]
x_train = np.expand_dims(x_train, axis=-1)

# Training parameters
epochs = 10000
batch_size = 64
sample_interval = 1000

# Training loop
for epoch in range(epochs):
    # Train the discriminator
    idx = np.random.randint(0, x_train.shape[0], batch_size)
    real_images = x_train[idx]
    real_labels = y_train[idx]
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    fake_labels = np.random.randint(0, num_classes, batch_size)
    fake_images = generator.predict([noise, fake_labels])
    d_loss_real = discriminator.train_on_batch([real_images, real_labels], np.ones((batch_size, 1)))
    d_loss_fake = discriminator.train_on_batch([fake_images, fake_labels], np.zeros((batch_size, 1)))
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    # Train the generator
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    sampled_labels = np.random.randint(0, num_classes, batch_size)
    g_loss = cgan.train_on_batch([noise, sampled_labels], np.ones((batch_size, 1)))

    # Print progress
    if epoch % sample_interval == 0:
        print(f"{epoch} [D loss: {d_loss[0]}, acc.: {d_loss[1] * 100}%] [G loss: {g_loss}]")

        # Generate and save images
        noise = np.random.normal(0, 1, (10, latent_dim))
        sampled_labels = np.arange(0, 10).reshape(-1, 1)
        generated_images = generator.predict([noise, sampled_labels])
        fig, axs = plt.subplots(1, 10, figsize=(20, 2))
        for i, img in enumerate(generated_images):
            axs[i].imshow(img.squeeze(), cmap='gray')
            axs[i].axis('off')
        plt.show()

Ejercicio 5: Evaluar un GAN Usando Inception Score y FID

Tarea: Evaluar el rendimiento de un GAN entrenado utilizando Inception Score (IS) y Fréchet Inception Distance (FID) en imágenes generadas.

Solución:

import tensorflow as tf
import numpy as np
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from scipy.linalg import sqrtm

# Function to calculate Inception Score
def calculate_inception_score(images, num_splits=10):
    model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
    images = tf.image.resize(images, (299, 299))
    images = preprocess_input(images)
    preds = model.predict(images)

    scores = []
    for i in range(num_splits):
        part = preds[i * len(preds) // num_splits: (i + 1) * len(preds) // num_splits]
        py = np.mean(part, axis=0)
        scores.append(np.exp(np.mean([np.sum(p * np.log(p / py)) for p in part])))
    return np.mean(scores), np.std(scores)

# Function to calculate FID score
def calculate_fid(real_images, generated_images):
    model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
    real_images = tf.image.resize(real_images, (299, 299))
    real_images = preprocess_input(real_images)
    gen_images = tf.image.resize(generated_images, (299, 299))
    gen_images = preprocess_input(gen_images)

    act1 = model.predict(real_images)
    act2 = model.predict(gen_images)

    mu1, sigma1 = act1.mean(axis=0), np.cov(act1, rowvar=False)
    mu2, sigma2 = act2.mean(axis=0), np.cov(act2, rowvar=False)

    ssdiff = np.sum((mu1 - mu2) ** 2.0)
    covmean = sqrtm(sigma1.dot(sigma2))

    if np.iscomplexobj(covmean):
        covmean = covmean.real

    fid = ssdiff + np.trace(sigma1 + sigma2 - 2.0 * covmean)
    return fid

# Example: Evaluate a trained GAN on CIFAR-10 dataset
latent_dim = 100
img_shape = (32, 32, 3)

# Load CIFAR-10 dataset
(x_train, _), (_, _) = tf.keras.datasets.cifar10.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5  # Normalize to [-1, 1]

# Assume generator is the trained GAN generator
# Generate some fake images using the trained GAN generator
noise = np.random.normal(0, 1, (1000, latent_dim))
generated_images = generator.predict(noise)

# Calculate Inception Score
is_mean, is_std = calculate_inception_score(generated_images)
print(f"Inception Score: {is_mean} ± {is_std}")

# Calculate FID Score
real_images = x_train[np.random.choice(x_train.shape[0], 1000, replace=False)]
fid_score = calculate_fid(real_images, generated_images)
print(f"FID Score: {fid_score}")

Estos ejercicios brindan experiencia práctica en la construcción, el entrenamiento y la evaluación de varios tipos de GANs. Al trabajar a través de estos ejercicios, profundizarás tu comprensión de los GANs y sus aplicaciones prácticas en diferentes dominios.

Ejercicios Prácticos para el Capítulo 3: Profundización en las Redes Generativas Antagónicas (GANs)

Estos ejercicios prácticos están diseñados para reforzar los conceptos cubiertos en este capítulo. Al trabajar en estos ejercicios, obtendrás experiencia práctica con GANs, incluyendo su arquitectura, entrenamiento, evaluación e innovaciones recientes.

Ejercicio 1: Construir y Entrenar un GAN Básico

Tarea: Construir y entrenar un GAN básico para generar imágenes en escala de grises de 28x28 similares al conjunto de datos MNIST.

Solución:

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

# Define the generator model
def build_generator(latent_dim):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(256 * 7 * 7, activation="relu", input_dim=latent_dim),
        tf.keras.layers.Reshape((7, 7, 256)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Conv2DTranspose(1, kernel_size=4, strides=1, padding='same', activation='tanh')
    ])
    return model

# Define the discriminator model
def build_discriminator(img_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same', input_shape=img_shape),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same'),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    return model

# Instantiate the GAN
latent_dim = 100
img_shape = (28, 28, 1)

generator = build_generator(latent_dim)
discriminator = build_discriminator(img_shape)
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

discriminator.trainable = False
gan_input = tf.keras.Input(shape=(latent_dim,))
generated_img = generator(gan_input)
validity = discriminator(generated_img)
gan = tf.keras.Model(gan_input, validity)
gan.compile(optimizer='adam', loss='binary_crossentropy')

# Load and preprocess the MNIST dataset
(x_train, _), (_, _) = tf.keras.datasets.mnist.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5  # Normalize to [-1, 1]
x_train = np.expand_dims(x_train, axis=-1)

# Training parameters
epochs = 10000
batch_size = 64
sample_interval = 1000

for epoch in range(epochs):
    # Train the discriminator
    idx = np.random.randint(0, x_train.shape[0], batch_size)
    real_images = x_train[idx]
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    fake_images = generator.predict(noise)
    d_loss_real = discriminator.train_on_batch(real_images, np.ones((batch_size, 1)))
    d_loss_fake = discriminator.train_on_batch(fake_images, np.zeros((batch_size, 1)))
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    # Train the generator
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    g_loss = gan.train_on_batch(noise, np.ones((batch_size, 1)))

    # Print progress
    if epoch % sample_interval == 0:
        print(f"{epoch} [D loss: {d_loss[0]}, acc.: {d_loss[1] * 100}%] [G loss: {g_loss}]")

        # Generate and save images
        noise = np.random.normal(0, 1, (10, latent_dim))
        generated_images = generator.predict(noise)
        fig, axs = plt.subplots(1, 10, figsize=(20, 2))
        for i, img in enumerate(generated_images):
            axs[i].imshow(img.squeeze(), cmap='gray')
            axs[i].axis('off')
        plt.show()

Ejercicio 2: Implementar y Evaluar un DCGAN

Tarea: Implementar un Deep Convolutional GAN (DCGAN) para generar imágenes RGB de 64x64. Evaluar el modelo usando la puntuación de Inception (IS) y la distancia de Fréchet Inception (FID).

Solución:

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from scipy.linalg import sqrtm

# Define DCGAN generator model
def build_dcgan_generator(latent_dim):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(256 * 8 * 8, activation="relu", input_dim=latent_dim),
        tf.keras.layers.Reshape((8, 8, 256)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Conv2DTranspose(3, kernel_size=4, strides=2, padding='same', activation='tanh')
    ])
    return model

# Define DCGAN discriminator model
def build_dcgan_discriminator(img_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same', input_shape=img_shape),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Conv2D(256, kernel_size=4, strides=2, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    return model

# Training parameters
latent_dim = 100
img_shape = (64, 64, 3)
epochs = 10000
batch_size = 64
sample_interval = 1000

# Instantiate the DCGAN
generator = build_dcgan_generator(latent_dim)
discriminator = build_dcgan_discriminator(img_shape)
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

discriminator.trainable = False
gan_input = tf.keras.Input(shape=(latent_dim,))
generated_img = generator(gan_input)
validity = discriminator(generated_img)
dcgan = tf.keras.Model(gan_input, validity)
dcgan.compile(optimizer='adam', loss='binary_crossentropy')

# Load and preprocess the dataset (e.g., CIFAR-10)
(x_train, _), (_, _) = tf.keras.datasets.cifar10.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5  # Normalize to [-1, 1]

# Training loop
for epoch in range(epochs):
    # Train the discriminator
    idx = np.random.randint(0, x_train.shape[0], batch_size)
    real_images = x_train[idx]
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    fake_images = generator.predict(noise)
    d_loss_real = discriminator.train_on_batch(real_images, np.ones((batch_size, 1)))
    d_loss_fake = discriminator.train_on_batch(fake_images, np.zeros((batch_size, 1)))
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    # Train the generator
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    g_loss = dcgan.train_on_batch(noise, np.ones((batch_size, 1)))

    # Print progress
    if epoch % sample_interval == 0:
        print(f"{epoch} [D loss: {d_loss[0]}, acc.: {d_loss[1] * 100}%] [G loss: {g_loss}]")

        # Generate and save images
        noise = np.random.normal(0, 1, (10, latent_dim))
        generated_images = generator.predict(noise)
        fig, axs = plt.subplots(1, 10, figsize=(20, 2))
        for i, img in enumerate(generated_images):
            axs[i].imshow((img * 127.5 + 127.5).astype(np.uint8))
            axs[i].axis('off')
        plt.show()

# Function to calculate Inception Score
def calculate_inception_score(images, num_splits=10):
    model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
    images = tf.image.resize(images, (299, 299))


 images = preprocess_input(images)
    preds = model.predict(images)

    scores = []
    for i in range(num_splits):
        part = preds[i * len(preds) // num_splits: (i + 1) * len(preds) // num_splits]
        py = np.mean(part, axis=0)
        scores.append(np.exp(np.mean([np.sum(p * np.log(p / py)) for p in part])))
    return np.mean(scores), np.std(scores)

# Function to calculate FID score
def calculate_fid(real_images, generated_images):
    model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
    real_images = tf.image.resize(real_images, (299, 299))
    real_images = preprocess_input(real_images)
    gen_images = tf.image.resize(generated_images, (299, 299))
    gen_images = preprocess_input(gen_images)

    act1 = model.predict(real_images)
    act2 = model.predict(gen_images)

    mu1, sigma1 = act1.mean(axis=0), np.cov(act1, rowvar=False)
    mu2, sigma2 = act2.mean(axis=0), np.cov(act2, rowvar=False)

    ssdiff = np.sum((mu1 - mu2) ** 2.0)
    covmean = sqrtm(sigma1.dot(sigma2))

    if np.iscomplexobj(covmean):
        covmean = covmean.real

    fid = ssdiff + np.trace(sigma1 + sigma2 - 2.0 * covmean)
    return fid

# Generate some fake images using the trained GAN generator
noise = np.random.normal(0, 1, (1000, latent_dim))
generated_images = generator.predict(noise)

# Calculate Inception Score
is_mean, is_std = calculate_inception_score(generated_images)
print(f"Inception Score: {is_mean} ± {is_std}")

# Calculate FID Score
real_images = x_train[np.random.choice(x_train.shape[0], 1000, replace=False)]
fid_score = calculate_fid(real_images, generated_images)
print(f"FID Score: {fid_score}")

Ejercicio 3: Implementar y Entrenar un CycleGAN

Tarea: Implementar y entrenar un CycleGAN para realizar la traducción de imágenes entre dos dominios, como traducir fotos a pinturas.

Solución:

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

# Define CycleGAN generator model
def build_cyclegan_generator(img_shape):
    input_img = tf.keras.Input(shape=img_shape)
    x = tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same')(input_img)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same')(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same')(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    output_img = tf.keras.layers.Conv2DTranspose(3, kernel_size=4, strides=2, padding='same', activation='tanh')(x)
    return tf.keras.Model(input_img, output_img)

# Define CycleGAN discriminator model
def build_cyclegan_discriminator(img_shape):
    input_img = tf.keras.Input(shape=img_shape)
    x = tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same')(input_img)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same')(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.Flatten()(x)
    validity = tf.keras.layers.Dense(1, activation='sigmoid')(x)
    return tf.keras.Model(input_img, validity)

# Build CycleGAN models
img_shape = (128, 128, 3)
G_AB = build_cyclegan_generator(img_shape)
G_BA = build_cyclegan_generator(img_shape)
D_A = build_cyclegan_discriminator(img_shape)
D_B = build_cyclegan_discriminator(img_shape)

D_A.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
D_B.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# CycleGAN loss
def cycle_loss(y_true, y_pred):
    return tf.reduce_mean(tf.abs(y_true - y_pred))

# Full CycleGAN model
img_A = tf.keras.Input(shape=img_shape)
img_B = tf.keras.Input(shape=img_shape)

fake_B = G_AB(img_A)
reconstr_A = G_BA(fake_B)
fake_A = G_BA(img_B)
reconstr_B = G_AB(fake_A)

D_A.trainable = False
D_B.trainable = False

valid_A = D_A(fake_A)
valid_B = D_B(fake_B)

cycle_gan = tf.keras.Model(inputs=[img_A, img_B], outputs=[valid_A, valid_B, reconstr_A, reconstr_B])
cycle_gan.compile(optimizer='adam', loss=['binary_crossentropy', 'binary_crossentropy', cycle_loss, cycle_loss])

# Summary of the models
G_AB.summary()
G_BA.summary()
D_A.summary()
D_B.summary()
cycle_gan.summary()

# Training parameters
epochs = 10000
batch_size = 64
sample_interval = 1000

# Load and preprocess the dataset (e.g., two image domains such as photos and paintings)
# Placeholder code for dataset loading
domain_A = ...  # Load your domain A images
domain_B = ...  # Load your domain B images

# Training loop
for epoch in range(epochs):
    # Train the discriminators
    idx_A = np.random.randint(0, domain_A.shape[0], batch_size)
    idx_B = np.random.randint(0, domain_B.shape[0], batch_size)
    real_A = domain_A[idx_A]
    real_B = domain_B[idx_B]

    fake_B = G_AB.predict(real_A)
    fake_A = G_BA.predict(real_B)

    dA_loss_real = D_A.train_on_batch(real_A, np.ones((batch_size, 1)))
    dA_loss_fake = D_A.train_on_batch(fake_A, np.zeros((batch_size, 1)))
    dA_loss = 0.5 * np.add(dA_loss_real, dA_loss_fake)

    dB_loss_real = D_B.train_on_batch(real_B, np.ones((batch_size, 1)))
    dB_loss_fake = D_B.train_on_batch(fake_B, np.zeros((batch_size, 1)))
    dB_loss = 0.5 * np.add(dB_loss_real, dB_loss_fake)

    # Train the generators
    g_loss = cycle_gan.train_on_batch([real_A, real_B], [np.ones((batch_size, 1)), np.ones((batch_size, 1)), real_A, real_B])

    # Print progress
    if epoch % sample_interval == 0:
        print(f"{epoch} [D_A loss: {dA_loss[0]}, acc.: {dA_loss[1] * 100}%] [D_B loss: {dB_loss[0]}, acc.: {dB_loss[1] * 100}%] [G loss: {g_loss}]")

        # Generate and save translated images
        fake_B = G_AB.predict(real_A)
        fake_A = G_BA.predict(real_B)
        fig, axs = plt.subplots(2, 10, figsize=(20, 4))
        for i in range(10):
            axs[0, i].imshow(fake_B[i])
            axs[0, i].axis('off')
            axs[1, i].imshow(fake_A[i])
            axs[1, i].axis('off')
        plt.show()

Ejercicio 4: Implementar un Conditional GAN (cGAN)

Tarea: Implementar un Conditional GAN (cGAN) para generar imágenes condicionadas por etiquetas de clase del conjunto de datos MNIST.

Solución:

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

# Define Conditional GAN generator model
def build_cgan_generator(latent_dim, num_classes, img_shape):
    noise = tf.keras.Input(shape=(latent_dim,))
    label = tf.keras.Input(shape=(1,), dtype='int32')
    label_embedding = tf.keras.layers.Flatten()(tf.keras.layers.Embedding(num_classes, latent_dim)(label))
    model_input = tf.keras.layers.multiply([noise, label_embedding])

    x = tf.keras.layers.Dense(256 * 7 * 7, activation="relu")(model_input)
    x = tf.keras.layers.Reshape((7, 7, 256))(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2

)(x)
    x = tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    output_img = tf.keras.layers.Conv2DTranspose(img_shape[-1], kernel_size=4, strides=1, padding='same', activation='tanh')(x)

    return tf.keras.Model([noise, label], output_img)

# Define Conditional GAN discriminator model
def build_cgan_discriminator(img_shape, num_classes):
    img = tf.keras.Input(shape=img_shape)
    label = tf.keras.Input(shape=(1,), dtype='int32')
    label_embedding = tf.keras.layers.Flatten()(tf.keras.layers.Embedding(num_classes, np.prod(img_shape))(label))
    label_embedding = tf.keras.layers.Reshape(img_shape)(label_embedding)
    model_input = tf.keras.layers.multiply([img, label_embedding])

    x = tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same')(model_input)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same')(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.Flatten()(x)
    validity = tf.keras.layers.Dense(1, activation='sigmoid')(x)

    return tf.keras.Model([img, label], validity)

# Build and compile the Conditional GAN
latent_dim = 100
num_classes = 10
img_shape = (28, 28, 1)

generator = build_cgan_generator(latent_dim, num_classes, img_shape)
discriminator = build_cgan_discriminator(img_shape, num_classes)
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

discriminator.trainable = False
noise = tf.keras.Input(shape=(latent_dim,))
label = tf.keras.Input(shape=(1,), dtype='int32')
generated_img = generator([noise, label])
validity = discriminator([generated_img, label])
cgan = tf.keras.Model([noise, label], validity)
cgan.compile(optimizer='adam', loss='binary_crossentropy')

# Load and preprocess the MNIST dataset
(x_train, y_train), (_, _) = tf.keras.datasets.mnist.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5  # Normalize to [-1, 1]
x_train = np.expand_dims(x_train, axis=-1)

# Training parameters
epochs = 10000
batch_size = 64
sample_interval = 1000

# Training loop
for epoch in range(epochs):
    # Train the discriminator
    idx = np.random.randint(0, x_train.shape[0], batch_size)
    real_images = x_train[idx]
    real_labels = y_train[idx]
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    fake_labels = np.random.randint(0, num_classes, batch_size)
    fake_images = generator.predict([noise, fake_labels])
    d_loss_real = discriminator.train_on_batch([real_images, real_labels], np.ones((batch_size, 1)))
    d_loss_fake = discriminator.train_on_batch([fake_images, fake_labels], np.zeros((batch_size, 1)))
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    # Train the generator
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    sampled_labels = np.random.randint(0, num_classes, batch_size)
    g_loss = cgan.train_on_batch([noise, sampled_labels], np.ones((batch_size, 1)))

    # Print progress
    if epoch % sample_interval == 0:
        print(f"{epoch} [D loss: {d_loss[0]}, acc.: {d_loss[1] * 100}%] [G loss: {g_loss}]")

        # Generate and save images
        noise = np.random.normal(0, 1, (10, latent_dim))
        sampled_labels = np.arange(0, 10).reshape(-1, 1)
        generated_images = generator.predict([noise, sampled_labels])
        fig, axs = plt.subplots(1, 10, figsize=(20, 2))
        for i, img in enumerate(generated_images):
            axs[i].imshow(img.squeeze(), cmap='gray')
            axs[i].axis('off')
        plt.show()

Ejercicio 5: Evaluar un GAN Usando Inception Score y FID

Tarea: Evaluar el rendimiento de un GAN entrenado utilizando Inception Score (IS) y Fréchet Inception Distance (FID) en imágenes generadas.

Solución:

import tensorflow as tf
import numpy as np
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from scipy.linalg import sqrtm

# Function to calculate Inception Score
def calculate_inception_score(images, num_splits=10):
    model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
    images = tf.image.resize(images, (299, 299))
    images = preprocess_input(images)
    preds = model.predict(images)

    scores = []
    for i in range(num_splits):
        part = preds[i * len(preds) // num_splits: (i + 1) * len(preds) // num_splits]
        py = np.mean(part, axis=0)
        scores.append(np.exp(np.mean([np.sum(p * np.log(p / py)) for p in part])))
    return np.mean(scores), np.std(scores)

# Function to calculate FID score
def calculate_fid(real_images, generated_images):
    model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
    real_images = tf.image.resize(real_images, (299, 299))
    real_images = preprocess_input(real_images)
    gen_images = tf.image.resize(generated_images, (299, 299))
    gen_images = preprocess_input(gen_images)

    act1 = model.predict(real_images)
    act2 = model.predict(gen_images)

    mu1, sigma1 = act1.mean(axis=0), np.cov(act1, rowvar=False)
    mu2, sigma2 = act2.mean(axis=0), np.cov(act2, rowvar=False)

    ssdiff = np.sum((mu1 - mu2) ** 2.0)
    covmean = sqrtm(sigma1.dot(sigma2))

    if np.iscomplexobj(covmean):
        covmean = covmean.real

    fid = ssdiff + np.trace(sigma1 + sigma2 - 2.0 * covmean)
    return fid

# Example: Evaluate a trained GAN on CIFAR-10 dataset
latent_dim = 100
img_shape = (32, 32, 3)

# Load CIFAR-10 dataset
(x_train, _), (_, _) = tf.keras.datasets.cifar10.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5  # Normalize to [-1, 1]

# Assume generator is the trained GAN generator
# Generate some fake images using the trained GAN generator
noise = np.random.normal(0, 1, (1000, latent_dim))
generated_images = generator.predict(noise)

# Calculate Inception Score
is_mean, is_std = calculate_inception_score(generated_images)
print(f"Inception Score: {is_mean} ± {is_std}")

# Calculate FID Score
real_images = x_train[np.random.choice(x_train.shape[0], 1000, replace=False)]
fid_score = calculate_fid(real_images, generated_images)
print(f"FID Score: {fid_score}")

Estos ejercicios brindan experiencia práctica en la construcción, el entrenamiento y la evaluación de varios tipos de GANs. Al trabajar a través de estos ejercicios, profundizarás tu comprensión de los GANs y sus aplicaciones prácticas en diferentes dominios.

Ejercicios Prácticos para el Capítulo 3: Profundización en las Redes Generativas Antagónicas (GANs)

Estos ejercicios prácticos están diseñados para reforzar los conceptos cubiertos en este capítulo. Al trabajar en estos ejercicios, obtendrás experiencia práctica con GANs, incluyendo su arquitectura, entrenamiento, evaluación e innovaciones recientes.

Ejercicio 1: Construir y Entrenar un GAN Básico

Tarea: Construir y entrenar un GAN básico para generar imágenes en escala de grises de 28x28 similares al conjunto de datos MNIST.

Solución:

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

# Define the generator model
def build_generator(latent_dim):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(256 * 7 * 7, activation="relu", input_dim=latent_dim),
        tf.keras.layers.Reshape((7, 7, 256)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Conv2DTranspose(1, kernel_size=4, strides=1, padding='same', activation='tanh')
    ])
    return model

# Define the discriminator model
def build_discriminator(img_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same', input_shape=img_shape),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same'),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    return model

# Instantiate the GAN
latent_dim = 100
img_shape = (28, 28, 1)

generator = build_generator(latent_dim)
discriminator = build_discriminator(img_shape)
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

discriminator.trainable = False
gan_input = tf.keras.Input(shape=(latent_dim,))
generated_img = generator(gan_input)
validity = discriminator(generated_img)
gan = tf.keras.Model(gan_input, validity)
gan.compile(optimizer='adam', loss='binary_crossentropy')

# Load and preprocess the MNIST dataset
(x_train, _), (_, _) = tf.keras.datasets.mnist.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5  # Normalize to [-1, 1]
x_train = np.expand_dims(x_train, axis=-1)

# Training parameters
epochs = 10000
batch_size = 64
sample_interval = 1000

for epoch in range(epochs):
    # Train the discriminator
    idx = np.random.randint(0, x_train.shape[0], batch_size)
    real_images = x_train[idx]
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    fake_images = generator.predict(noise)
    d_loss_real = discriminator.train_on_batch(real_images, np.ones((batch_size, 1)))
    d_loss_fake = discriminator.train_on_batch(fake_images, np.zeros((batch_size, 1)))
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    # Train the generator
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    g_loss = gan.train_on_batch(noise, np.ones((batch_size, 1)))

    # Print progress
    if epoch % sample_interval == 0:
        print(f"{epoch} [D loss: {d_loss[0]}, acc.: {d_loss[1] * 100}%] [G loss: {g_loss}]")

        # Generate and save images
        noise = np.random.normal(0, 1, (10, latent_dim))
        generated_images = generator.predict(noise)
        fig, axs = plt.subplots(1, 10, figsize=(20, 2))
        for i, img in enumerate(generated_images):
            axs[i].imshow(img.squeeze(), cmap='gray')
            axs[i].axis('off')
        plt.show()

Ejercicio 2: Implementar y Evaluar un DCGAN

Tarea: Implementar un Deep Convolutional GAN (DCGAN) para generar imágenes RGB de 64x64. Evaluar el modelo usando la puntuación de Inception (IS) y la distancia de Fréchet Inception (FID).

Solución:

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from scipy.linalg import sqrtm

# Define DCGAN generator model
def build_dcgan_generator(latent_dim):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(256 * 8 * 8, activation="relu", input_dim=latent_dim),
        tf.keras.layers.Reshape((8, 8, 256)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Conv2DTranspose(3, kernel_size=4, strides=2, padding='same', activation='tanh')
    ])
    return model

# Define DCGAN discriminator model
def build_dcgan_discriminator(img_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same', input_shape=img_shape),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Conv2D(256, kernel_size=4, strides=2, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LeakyReLU(alpha=0.2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    return model

# Training parameters
latent_dim = 100
img_shape = (64, 64, 3)
epochs = 10000
batch_size = 64
sample_interval = 1000

# Instantiate the DCGAN
generator = build_dcgan_generator(latent_dim)
discriminator = build_dcgan_discriminator(img_shape)
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

discriminator.trainable = False
gan_input = tf.keras.Input(shape=(latent_dim,))
generated_img = generator(gan_input)
validity = discriminator(generated_img)
dcgan = tf.keras.Model(gan_input, validity)
dcgan.compile(optimizer='adam', loss='binary_crossentropy')

# Load and preprocess the dataset (e.g., CIFAR-10)
(x_train, _), (_, _) = tf.keras.datasets.cifar10.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5  # Normalize to [-1, 1]

# Training loop
for epoch in range(epochs):
    # Train the discriminator
    idx = np.random.randint(0, x_train.shape[0], batch_size)
    real_images = x_train[idx]
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    fake_images = generator.predict(noise)
    d_loss_real = discriminator.train_on_batch(real_images, np.ones((batch_size, 1)))
    d_loss_fake = discriminator.train_on_batch(fake_images, np.zeros((batch_size, 1)))
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    # Train the generator
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    g_loss = dcgan.train_on_batch(noise, np.ones((batch_size, 1)))

    # Print progress
    if epoch % sample_interval == 0:
        print(f"{epoch} [D loss: {d_loss[0]}, acc.: {d_loss[1] * 100}%] [G loss: {g_loss}]")

        # Generate and save images
        noise = np.random.normal(0, 1, (10, latent_dim))
        generated_images = generator.predict(noise)
        fig, axs = plt.subplots(1, 10, figsize=(20, 2))
        for i, img in enumerate(generated_images):
            axs[i].imshow((img * 127.5 + 127.5).astype(np.uint8))
            axs[i].axis('off')
        plt.show()

# Function to calculate Inception Score
def calculate_inception_score(images, num_splits=10):
    model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
    images = tf.image.resize(images, (299, 299))


 images = preprocess_input(images)
    preds = model.predict(images)

    scores = []
    for i in range(num_splits):
        part = preds[i * len(preds) // num_splits: (i + 1) * len(preds) // num_splits]
        py = np.mean(part, axis=0)
        scores.append(np.exp(np.mean([np.sum(p * np.log(p / py)) for p in part])))
    return np.mean(scores), np.std(scores)

# Function to calculate FID score
def calculate_fid(real_images, generated_images):
    model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
    real_images = tf.image.resize(real_images, (299, 299))
    real_images = preprocess_input(real_images)
    gen_images = tf.image.resize(generated_images, (299, 299))
    gen_images = preprocess_input(gen_images)

    act1 = model.predict(real_images)
    act2 = model.predict(gen_images)

    mu1, sigma1 = act1.mean(axis=0), np.cov(act1, rowvar=False)
    mu2, sigma2 = act2.mean(axis=0), np.cov(act2, rowvar=False)

    ssdiff = np.sum((mu1 - mu2) ** 2.0)
    covmean = sqrtm(sigma1.dot(sigma2))

    if np.iscomplexobj(covmean):
        covmean = covmean.real

    fid = ssdiff + np.trace(sigma1 + sigma2 - 2.0 * covmean)
    return fid

# Generate some fake images using the trained GAN generator
noise = np.random.normal(0, 1, (1000, latent_dim))
generated_images = generator.predict(noise)

# Calculate Inception Score
is_mean, is_std = calculate_inception_score(generated_images)
print(f"Inception Score: {is_mean} ± {is_std}")

# Calculate FID Score
real_images = x_train[np.random.choice(x_train.shape[0], 1000, replace=False)]
fid_score = calculate_fid(real_images, generated_images)
print(f"FID Score: {fid_score}")

Ejercicio 3: Implementar y Entrenar un CycleGAN

Tarea: Implementar y entrenar un CycleGAN para realizar la traducción de imágenes entre dos dominios, como traducir fotos a pinturas.

Solución:

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

# Define CycleGAN generator model
def build_cyclegan_generator(img_shape):
    input_img = tf.keras.Input(shape=img_shape)
    x = tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same')(input_img)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same')(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same')(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    output_img = tf.keras.layers.Conv2DTranspose(3, kernel_size=4, strides=2, padding='same', activation='tanh')(x)
    return tf.keras.Model(input_img, output_img)

# Define CycleGAN discriminator model
def build_cyclegan_discriminator(img_shape):
    input_img = tf.keras.Input(shape=img_shape)
    x = tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same')(input_img)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same')(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.Flatten()(x)
    validity = tf.keras.layers.Dense(1, activation='sigmoid')(x)
    return tf.keras.Model(input_img, validity)

# Build CycleGAN models
img_shape = (128, 128, 3)
G_AB = build_cyclegan_generator(img_shape)
G_BA = build_cyclegan_generator(img_shape)
D_A = build_cyclegan_discriminator(img_shape)
D_B = build_cyclegan_discriminator(img_shape)

D_A.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
D_B.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# CycleGAN loss
def cycle_loss(y_true, y_pred):
    return tf.reduce_mean(tf.abs(y_true - y_pred))

# Full CycleGAN model
img_A = tf.keras.Input(shape=img_shape)
img_B = tf.keras.Input(shape=img_shape)

fake_B = G_AB(img_A)
reconstr_A = G_BA(fake_B)
fake_A = G_BA(img_B)
reconstr_B = G_AB(fake_A)

D_A.trainable = False
D_B.trainable = False

valid_A = D_A(fake_A)
valid_B = D_B(fake_B)

cycle_gan = tf.keras.Model(inputs=[img_A, img_B], outputs=[valid_A, valid_B, reconstr_A, reconstr_B])
cycle_gan.compile(optimizer='adam', loss=['binary_crossentropy', 'binary_crossentropy', cycle_loss, cycle_loss])

# Summary of the models
G_AB.summary()
G_BA.summary()
D_A.summary()
D_B.summary()
cycle_gan.summary()

# Training parameters
epochs = 10000
batch_size = 64
sample_interval = 1000

# Load and preprocess the dataset (e.g., two image domains such as photos and paintings)
# Placeholder code for dataset loading
domain_A = ...  # Load your domain A images
domain_B = ...  # Load your domain B images

# Training loop
for epoch in range(epochs):
    # Train the discriminators
    idx_A = np.random.randint(0, domain_A.shape[0], batch_size)
    idx_B = np.random.randint(0, domain_B.shape[0], batch_size)
    real_A = domain_A[idx_A]
    real_B = domain_B[idx_B]

    fake_B = G_AB.predict(real_A)
    fake_A = G_BA.predict(real_B)

    dA_loss_real = D_A.train_on_batch(real_A, np.ones((batch_size, 1)))
    dA_loss_fake = D_A.train_on_batch(fake_A, np.zeros((batch_size, 1)))
    dA_loss = 0.5 * np.add(dA_loss_real, dA_loss_fake)

    dB_loss_real = D_B.train_on_batch(real_B, np.ones((batch_size, 1)))
    dB_loss_fake = D_B.train_on_batch(fake_B, np.zeros((batch_size, 1)))
    dB_loss = 0.5 * np.add(dB_loss_real, dB_loss_fake)

    # Train the generators
    g_loss = cycle_gan.train_on_batch([real_A, real_B], [np.ones((batch_size, 1)), np.ones((batch_size, 1)), real_A, real_B])

    # Print progress
    if epoch % sample_interval == 0:
        print(f"{epoch} [D_A loss: {dA_loss[0]}, acc.: {dA_loss[1] * 100}%] [D_B loss: {dB_loss[0]}, acc.: {dB_loss[1] * 100}%] [G loss: {g_loss}]")

        # Generate and save translated images
        fake_B = G_AB.predict(real_A)
        fake_A = G_BA.predict(real_B)
        fig, axs = plt.subplots(2, 10, figsize=(20, 4))
        for i in range(10):
            axs[0, i].imshow(fake_B[i])
            axs[0, i].axis('off')
            axs[1, i].imshow(fake_A[i])
            axs[1, i].axis('off')
        plt.show()

Ejercicio 4: Implementar un Conditional GAN (cGAN)

Tarea: Implementar un Conditional GAN (cGAN) para generar imágenes condicionadas por etiquetas de clase del conjunto de datos MNIST.

Solución:

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

# Define Conditional GAN generator model
def build_cgan_generator(latent_dim, num_classes, img_shape):
    noise = tf.keras.Input(shape=(latent_dim,))
    label = tf.keras.Input(shape=(1,), dtype='int32')
    label_embedding = tf.keras.layers.Flatten()(tf.keras.layers.Embedding(num_classes, latent_dim)(label))
    model_input = tf.keras.layers.multiply([noise, label_embedding])

    x = tf.keras.layers.Dense(256 * 7 * 7, activation="relu")(model_input)
    x = tf.keras.layers.Reshape((7, 7, 256))(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2

)(x)
    x = tf.keras.layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    output_img = tf.keras.layers.Conv2DTranspose(img_shape[-1], kernel_size=4, strides=1, padding='same', activation='tanh')(x)

    return tf.keras.Model([noise, label], output_img)

# Define Conditional GAN discriminator model
def build_cgan_discriminator(img_shape, num_classes):
    img = tf.keras.Input(shape=img_shape)
    label = tf.keras.Input(shape=(1,), dtype='int32')
    label_embedding = tf.keras.layers.Flatten()(tf.keras.layers.Embedding(num_classes, np.prod(img_shape))(label))
    label_embedding = tf.keras.layers.Reshape(img_shape)(label_embedding)
    model_input = tf.keras.layers.multiply([img, label_embedding])

    x = tf.keras.layers.Conv2D(64, kernel_size=4, strides=2, padding='same')(model_input)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.Conv2D(128, kernel_size=4, strides=2, padding='same')(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.Flatten()(x)
    validity = tf.keras.layers.Dense(1, activation='sigmoid')(x)

    return tf.keras.Model([img, label], validity)

# Build and compile the Conditional GAN
latent_dim = 100
num_classes = 10
img_shape = (28, 28, 1)

generator = build_cgan_generator(latent_dim, num_classes, img_shape)
discriminator = build_cgan_discriminator(img_shape, num_classes)
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

discriminator.trainable = False
noise = tf.keras.Input(shape=(latent_dim,))
label = tf.keras.Input(shape=(1,), dtype='int32')
generated_img = generator([noise, label])
validity = discriminator([generated_img, label])
cgan = tf.keras.Model([noise, label], validity)
cgan.compile(optimizer='adam', loss='binary_crossentropy')

# Load and preprocess the MNIST dataset
(x_train, y_train), (_, _) = tf.keras.datasets.mnist.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5  # Normalize to [-1, 1]
x_train = np.expand_dims(x_train, axis=-1)

# Training parameters
epochs = 10000
batch_size = 64
sample_interval = 1000

# Training loop
for epoch in range(epochs):
    # Train the discriminator
    idx = np.random.randint(0, x_train.shape[0], batch_size)
    real_images = x_train[idx]
    real_labels = y_train[idx]
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    fake_labels = np.random.randint(0, num_classes, batch_size)
    fake_images = generator.predict([noise, fake_labels])
    d_loss_real = discriminator.train_on_batch([real_images, real_labels], np.ones((batch_size, 1)))
    d_loss_fake = discriminator.train_on_batch([fake_images, fake_labels], np.zeros((batch_size, 1)))
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    # Train the generator
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    sampled_labels = np.random.randint(0, num_classes, batch_size)
    g_loss = cgan.train_on_batch([noise, sampled_labels], np.ones((batch_size, 1)))

    # Print progress
    if epoch % sample_interval == 0:
        print(f"{epoch} [D loss: {d_loss[0]}, acc.: {d_loss[1] * 100}%] [G loss: {g_loss}]")

        # Generate and save images
        noise = np.random.normal(0, 1, (10, latent_dim))
        sampled_labels = np.arange(0, 10).reshape(-1, 1)
        generated_images = generator.predict([noise, sampled_labels])
        fig, axs = plt.subplots(1, 10, figsize=(20, 2))
        for i, img in enumerate(generated_images):
            axs[i].imshow(img.squeeze(), cmap='gray')
            axs[i].axis('off')
        plt.show()

Ejercicio 5: Evaluar un GAN Usando Inception Score y FID

Tarea: Evaluar el rendimiento de un GAN entrenado utilizando Inception Score (IS) y Fréchet Inception Distance (FID) en imágenes generadas.

Solución:

import tensorflow as tf
import numpy as np
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from scipy.linalg import sqrtm

# Function to calculate Inception Score
def calculate_inception_score(images, num_splits=10):
    model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
    images = tf.image.resize(images, (299, 299))
    images = preprocess_input(images)
    preds = model.predict(images)

    scores = []
    for i in range(num_splits):
        part = preds[i * len(preds) // num_splits: (i + 1) * len(preds) // num_splits]
        py = np.mean(part, axis=0)
        scores.append(np.exp(np.mean([np.sum(p * np.log(p / py)) for p in part])))
    return np.mean(scores), np.std(scores)

# Function to calculate FID score
def calculate_fid(real_images, generated_images):
    model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))
    real_images = tf.image.resize(real_images, (299, 299))
    real_images = preprocess_input(real_images)
    gen_images = tf.image.resize(generated_images, (299, 299))
    gen_images = preprocess_input(gen_images)

    act1 = model.predict(real_images)
    act2 = model.predict(gen_images)

    mu1, sigma1 = act1.mean(axis=0), np.cov(act1, rowvar=False)
    mu2, sigma2 = act2.mean(axis=0), np.cov(act2, rowvar=False)

    ssdiff = np.sum((mu1 - mu2) ** 2.0)
    covmean = sqrtm(sigma1.dot(sigma2))

    if np.iscomplexobj(covmean):
        covmean = covmean.real

    fid = ssdiff + np.trace(sigma1 + sigma2 - 2.0 * covmean)
    return fid

# Example: Evaluate a trained GAN on CIFAR-10 dataset
latent_dim = 100
img_shape = (32, 32, 3)

# Load CIFAR-10 dataset
(x_train, _), (_, _) = tf.keras.datasets.cifar10.load_data()
x_train = (x_train.astype(np.float32) - 127.5) / 127.5  # Normalize to [-1, 1]

# Assume generator is the trained GAN generator
# Generate some fake images using the trained GAN generator
noise = np.random.normal(0, 1, (1000, latent_dim))
generated_images = generator.predict(noise)

# Calculate Inception Score
is_mean, is_std = calculate_inception_score(generated_images)
print(f"Inception Score: {is_mean} ± {is_std}")

# Calculate FID Score
real_images = x_train[np.random.choice(x_train.shape[0], 1000, replace=False)]
fid_score = calculate_fid(real_images, generated_images)
print(f"FID Score: {fid_score}")

Estos ejercicios brindan experiencia práctica en la construcción, el entrenamiento y la evaluación de varios tipos de GANs. Al trabajar a través de estos ejercicios, profundizarás tu comprensión de los GANs y sus aplicaciones prácticas en diferentes dominios.