Capítulo 5: Explorando Autoencoders Variacionales (VAEs)
Ejercicios Prácticos
Esta sección proporciona ejercicios prácticos para reforzar tu comprensión de los Autoencoders Variacionales (VAEs) y sus variaciones. Cada ejercicio incluye un enunciado del problema y una solución con ejemplos de código cuando corresponda.
Ejercicio 1: Implementar un VAE Básico
Enunciado del Problema: Implementa un Autoencoder Variacional (VAE) básico usando TensorFlow y Keras. Entrena el VAE en el conjunto de datos MNIST y visualiza las imágenes reconstruidas.
Solución:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Lambda, Layer
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
import numpy as np
import matplotlib.pyplot as plt
# Load the MNIST dataset
(x_train, _), (x_test, _) = tf.keras.datasets.mnist.load_data()
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = x_train.reshape((x_train.shape[0], -1))
x_test = x_test.reshape((x_test.shape[0], -1))
# Sampling layer using the reparameterization trick
class Sampling(Layer):
def call(self, inputs):
z_mean, z_log_var = inputs
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = K.random_normal(shape=(batch, dim))
return z_mean + K.exp(0.5 * z_log_var) * epsilon
# Encoder network
def build_encoder(input_shape, latent_dim):
inputs = Input(shape=input_shape)
x = Dense(512, activation='relu')(inputs)
x = Dense(256, activation='relu')(x)
z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)
z = Sampling()([z_mean, z_log_var])
return Model(inputs, [z_mean, z_log_var, z], name='encoder')
# Decoder network
def build_decoder(latent_dim, output_shape):
latent_inputs = Input
```python
(shape=(latent_dim,))
x = Dense(256, activation='relu')(latent_inputs)
x = Dense(512, activation='relu')(x)
outputs = Dense(output_shape, activation='sigmoid')(x)
return Model(latent_inputs, outputs, name='decoder')
# Define the input shape and latent dimension
input_shape = (784,)
latent_dim = 2
# Build the encoder and decoder
encoder = build_encoder(input_shape, latent_dim)
decoder = build_decoder(latent_dim, input_shape[0])
# Define the VAE model
inputs = Input(shape=input_shape)
z_mean, z_log_var, z = encoder(inputs)
outputs = decoder(z)
vae = Model(inputs, outputs, name='vae')
# Define the VAE loss function
def vae_loss(inputs, outputs, z_mean, z_log_var):
reconstruction_loss = tf.keras.losses.binary_crossentropy(inputs, outputs)
reconstruction_loss *= input_shape[0]
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
return K.mean(reconstruction_loss + kl_loss)
# Compile the VAE model
vae.compile(optimizer='adam', loss=lambda x, y: vae_loss(x, y, z_mean, z_log_var))
# Train the VAE model
vae.fit(x_train, x_train, epochs=50, batch_size=128, validation_data=(x_test, x_test))
# Reconstruct images
def reconstruct_images(vae, x_test, n_samples=10):
reconstructed_images = vae.predict(x_test[:n_samples])
original_images = x_test[:n_samples].reshape((n_samples, 28, 28))
reconstructed_images = reconstructed_images.reshape((n_samples, 28, 28))
plt.figure(figsize=(10, 4))
for i in range(n_samples):
plt.subplot(2, n_samples, i + 1)
plt.imshow(original_images[i], cmap='gray')
plt.axis('off')
plt.subplot(2, n_samples, n_samples + i + 1)
plt.imshow(reconstructed_images[i], cmap='gray')
plt.axis('off')
plt.show()
# Visualize the reconstructed images
reconstruct_images(vae, x_test)
Ejercicio 2: Implementar un Beta-VAE
Enunciado del Problema: Implementa un Beta-VAE modificando el VAE básico para incluir un hiperparámetro beta. Entrena el Beta-VAE en el conjunto de datos MNIST y visualiza el espacio latente.
Solución:
# Define the Beta-VAE loss function
def beta_vae_loss(inputs, outputs, z_mean, z_log_var, beta=4.0):
reconstruction_loss = tf.keras.losses.binary_crossentropy(inputs, outputs)
reconstruction_loss *= input_shape[0]
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
return K.mean(reconstruction_loss + beta * kl_loss)
# Compile the Beta-VAE model
vae.compile(optimizer='adam', loss=lambda x, y: beta_vae_loss(x, y, z_mean, z_log_var, beta=4.0))
# Train the Beta-VAE model
vae.fit(x_train, x_train, epochs=50, batch_size=128, validation_data=(x_test, x_test))
# Visualize the latent space
def plot_latent_space(encoder, x_test, y_test, n_samples=10000):
z_mean, _, _ = encoder.predict(x_test)
plt.figure(figsize=(10, 10))
scatter = plt.scatter(z_mean[:n_samples, 0], z_mean[:n_samples, 1], c=y_test[:n_samples], cmap='viridis')
plt.colorbar(scatter)
plt.xlabel('z[0]')
plt.ylabel('z[1]')
plt.title('Latent Space')
plt.show()
# Plot the latent space
plot_latent_space(encoder, x_test, _)
Ejercicio 3: Implementar un VAE Condicional (CVAE)
Enunciado del Problema: Implementa un Variational Autoencoder Condicional (CVAE) que se condicione en las etiquetas de dígitos del conjunto de datos MNIST. Entrena el CVAE y genera imágenes condicionadas a etiquetas específicas.
Solución:
from tensorflow.keras.layers import Concatenate
# Encoder network for CVAE
def build_cvae_encoder(input_shape, num_classes, latent_dim):
inputs = Input(shape=input_shape)
labels = Input(shape=(num_classes,))
x = Dense(512, activation='relu')(inputs)
x = Concatenate()([x, labels])
x = Dense(256, activation='relu')(x)
z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)
z = Sampling()([z_mean, z_log_var])
return Model([inputs, labels], [z_mean, z_log_var, z], name='cvae_encoder')
# Decoder network for CVAE
def build_cvae_decoder(latent_dim, num_classes, output_shape):
latent_inputs = Input(shape=(latent_dim,))
labels = Input(shape=(num_classes,))
x = Dense(256, activation='relu')(latent_inputs)
x = Concatenate()([x, labels])
x = Dense(512, activation='relu')(x)
outputs = Dense(output_shape, activation='sigmoid')(x)
return Model([latent_inputs, labels], outputs, name='cvae_decoder')
# Define the input shape, number of classes, and latent dimension
input_shape = (784,)
num_classes = 10
latent_dim = 2
# Build the encoder and decoder for CVAE
cvae_encoder = build_cvae_encoder(input_shape, num_classes, latent_dim)
cvae_decoder = build_cvae_decoder(latent_dim, num_classes, input_shape[0])
# Define the Conditional VAE model
inputs = Input(shape=input_shape)
labels = Input(shape=(num_classes,))
z_mean, z_log_var, z = cvae_encoder([inputs, labels])
outputs = cvae_decoder([z, labels])
cvae = Model([inputs, labels], outputs, name='cvae')
# Define the CVAE loss function
def cvae_loss(inputs, outputs, z_mean, z_log_var):
reconstruction_loss = tf.keras.losses.binary_crossentropy(inputs, outputs)
reconstruction_loss *= input_shape[0]
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
return K.mean(reconstruction_loss + kl_loss)
# Compile the CVAE model
cvae.compile(optimizer='adam', loss=lambda x, y: cvae_loss(x, y, z_mean, z_log_var))
# Prepare the labels for training
y_train = tf.keras.utils.to_categorical(_, num_classes)
y_test = tf.keras.utils.to_categorical(_, num_classes)
# Train the CVAE model
cvae.fit([x_train, y_train], x_train, epochs=50, batch_size=128, validation_data=([x_test, y_test], x_test))
# Generate images conditioned on specific labels
def generate_conditioned_images(cvae_decoder, label, latent_dim, num_classes, n_samples=10):
label_vector = tf.keras.utils.to_categorical([label] * n_samples, num_classes)
random_latent_vectors = np.random.normal(size=(n_samples, latent_dim))
generated_images = cvae_decoder.predict([random_latent_vectors, label_vector])
generated_images = generated_images.reshape((n_samples, 28, 28))
plt.figure(figsize=(10, 2))
for i in range(n_samples):
plt.subplot(1, n_samples, i + 1)
plt.imshow(generated_images[i], cmap='gray')
plt.axis('off')
plt.show()
# Generate and visualize images conditioned on the digit '5'
generate_conditioned_images(cvae_decoder, 5, latent_dim, num_classes)
Estos ejercicios prácticos proporcionan experiencia práctica con Autoencoders Variacionales (VAEs) y sus variaciones. Al implementar un VAE básico, Beta-VAE y VAE Condicional, podrás profundizar en tu comprensión de estos modelos y sus aplicaciones.
Estos ejercicios cubren aspectos esenciales de los VAEs, incluyendo generación de imágenes, reconstrucción, visualización de espacios latentes y generación condicional, lo que te ayudará a aplicar estas técnicas a diversas tareas en modelado generativo.
Ejercicios Prácticos
Esta sección proporciona ejercicios prácticos para reforzar tu comprensión de los Autoencoders Variacionales (VAEs) y sus variaciones. Cada ejercicio incluye un enunciado del problema y una solución con ejemplos de código cuando corresponda.
Ejercicio 1: Implementar un VAE Básico
Enunciado del Problema: Implementa un Autoencoder Variacional (VAE) básico usando TensorFlow y Keras. Entrena el VAE en el conjunto de datos MNIST y visualiza las imágenes reconstruidas.
Solución:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Lambda, Layer
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
import numpy as np
import matplotlib.pyplot as plt
# Load the MNIST dataset
(x_train, _), (x_test, _) = tf.keras.datasets.mnist.load_data()
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = x_train.reshape((x_train.shape[0], -1))
x_test = x_test.reshape((x_test.shape[0], -1))
# Sampling layer using the reparameterization trick
class Sampling(Layer):
def call(self, inputs):
z_mean, z_log_var = inputs
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = K.random_normal(shape=(batch, dim))
return z_mean + K.exp(0.5 * z_log_var) * epsilon
# Encoder network
def build_encoder(input_shape, latent_dim):
inputs = Input(shape=input_shape)
x = Dense(512, activation='relu')(inputs)
x = Dense(256, activation='relu')(x)
z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)
z = Sampling()([z_mean, z_log_var])
return Model(inputs, [z_mean, z_log_var, z], name='encoder')
# Decoder network
def build_decoder(latent_dim, output_shape):
latent_inputs = Input
```python
(shape=(latent_dim,))
x = Dense(256, activation='relu')(latent_inputs)
x = Dense(512, activation='relu')(x)
outputs = Dense(output_shape, activation='sigmoid')(x)
return Model(latent_inputs, outputs, name='decoder')
# Define the input shape and latent dimension
input_shape = (784,)
latent_dim = 2
# Build the encoder and decoder
encoder = build_encoder(input_shape, latent_dim)
decoder = build_decoder(latent_dim, input_shape[0])
# Define the VAE model
inputs = Input(shape=input_shape)
z_mean, z_log_var, z = encoder(inputs)
outputs = decoder(z)
vae = Model(inputs, outputs, name='vae')
# Define the VAE loss function
def vae_loss(inputs, outputs, z_mean, z_log_var):
reconstruction_loss = tf.keras.losses.binary_crossentropy(inputs, outputs)
reconstruction_loss *= input_shape[0]
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
return K.mean(reconstruction_loss + kl_loss)
# Compile the VAE model
vae.compile(optimizer='adam', loss=lambda x, y: vae_loss(x, y, z_mean, z_log_var))
# Train the VAE model
vae.fit(x_train, x_train, epochs=50, batch_size=128, validation_data=(x_test, x_test))
# Reconstruct images
def reconstruct_images(vae, x_test, n_samples=10):
reconstructed_images = vae.predict(x_test[:n_samples])
original_images = x_test[:n_samples].reshape((n_samples, 28, 28))
reconstructed_images = reconstructed_images.reshape((n_samples, 28, 28))
plt.figure(figsize=(10, 4))
for i in range(n_samples):
plt.subplot(2, n_samples, i + 1)
plt.imshow(original_images[i], cmap='gray')
plt.axis('off')
plt.subplot(2, n_samples, n_samples + i + 1)
plt.imshow(reconstructed_images[i], cmap='gray')
plt.axis('off')
plt.show()
# Visualize the reconstructed images
reconstruct_images(vae, x_test)
Ejercicio 2: Implementar un Beta-VAE
Enunciado del Problema: Implementa un Beta-VAE modificando el VAE básico para incluir un hiperparámetro beta. Entrena el Beta-VAE en el conjunto de datos MNIST y visualiza el espacio latente.
Solución:
# Define the Beta-VAE loss function
def beta_vae_loss(inputs, outputs, z_mean, z_log_var, beta=4.0):
reconstruction_loss = tf.keras.losses.binary_crossentropy(inputs, outputs)
reconstruction_loss *= input_shape[0]
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
return K.mean(reconstruction_loss + beta * kl_loss)
# Compile the Beta-VAE model
vae.compile(optimizer='adam', loss=lambda x, y: beta_vae_loss(x, y, z_mean, z_log_var, beta=4.0))
# Train the Beta-VAE model
vae.fit(x_train, x_train, epochs=50, batch_size=128, validation_data=(x_test, x_test))
# Visualize the latent space
def plot_latent_space(encoder, x_test, y_test, n_samples=10000):
z_mean, _, _ = encoder.predict(x_test)
plt.figure(figsize=(10, 10))
scatter = plt.scatter(z_mean[:n_samples, 0], z_mean[:n_samples, 1], c=y_test[:n_samples], cmap='viridis')
plt.colorbar(scatter)
plt.xlabel('z[0]')
plt.ylabel('z[1]')
plt.title('Latent Space')
plt.show()
# Plot the latent space
plot_latent_space(encoder, x_test, _)
Ejercicio 3: Implementar un VAE Condicional (CVAE)
Enunciado del Problema: Implementa un Variational Autoencoder Condicional (CVAE) que se condicione en las etiquetas de dígitos del conjunto de datos MNIST. Entrena el CVAE y genera imágenes condicionadas a etiquetas específicas.
Solución:
from tensorflow.keras.layers import Concatenate
# Encoder network for CVAE
def build_cvae_encoder(input_shape, num_classes, latent_dim):
inputs = Input(shape=input_shape)
labels = Input(shape=(num_classes,))
x = Dense(512, activation='relu')(inputs)
x = Concatenate()([x, labels])
x = Dense(256, activation='relu')(x)
z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)
z = Sampling()([z_mean, z_log_var])
return Model([inputs, labels], [z_mean, z_log_var, z], name='cvae_encoder')
# Decoder network for CVAE
def build_cvae_decoder(latent_dim, num_classes, output_shape):
latent_inputs = Input(shape=(latent_dim,))
labels = Input(shape=(num_classes,))
x = Dense(256, activation='relu')(latent_inputs)
x = Concatenate()([x, labels])
x = Dense(512, activation='relu')(x)
outputs = Dense(output_shape, activation='sigmoid')(x)
return Model([latent_inputs, labels], outputs, name='cvae_decoder')
# Define the input shape, number of classes, and latent dimension
input_shape = (784,)
num_classes = 10
latent_dim = 2
# Build the encoder and decoder for CVAE
cvae_encoder = build_cvae_encoder(input_shape, num_classes, latent_dim)
cvae_decoder = build_cvae_decoder(latent_dim, num_classes, input_shape[0])
# Define the Conditional VAE model
inputs = Input(shape=input_shape)
labels = Input(shape=(num_classes,))
z_mean, z_log_var, z = cvae_encoder([inputs, labels])
outputs = cvae_decoder([z, labels])
cvae = Model([inputs, labels], outputs, name='cvae')
# Define the CVAE loss function
def cvae_loss(inputs, outputs, z_mean, z_log_var):
reconstruction_loss = tf.keras.losses.binary_crossentropy(inputs, outputs)
reconstruction_loss *= input_shape[0]
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
return K.mean(reconstruction_loss + kl_loss)
# Compile the CVAE model
cvae.compile(optimizer='adam', loss=lambda x, y: cvae_loss(x, y, z_mean, z_log_var))
# Prepare the labels for training
y_train = tf.keras.utils.to_categorical(_, num_classes)
y_test = tf.keras.utils.to_categorical(_, num_classes)
# Train the CVAE model
cvae.fit([x_train, y_train], x_train, epochs=50, batch_size=128, validation_data=([x_test, y_test], x_test))
# Generate images conditioned on specific labels
def generate_conditioned_images(cvae_decoder, label, latent_dim, num_classes, n_samples=10):
label_vector = tf.keras.utils.to_categorical([label] * n_samples, num_classes)
random_latent_vectors = np.random.normal(size=(n_samples, latent_dim))
generated_images = cvae_decoder.predict([random_latent_vectors, label_vector])
generated_images = generated_images.reshape((n_samples, 28, 28))
plt.figure(figsize=(10, 2))
for i in range(n_samples):
plt.subplot(1, n_samples, i + 1)
plt.imshow(generated_images[i], cmap='gray')
plt.axis('off')
plt.show()
# Generate and visualize images conditioned on the digit '5'
generate_conditioned_images(cvae_decoder, 5, latent_dim, num_classes)
Estos ejercicios prácticos proporcionan experiencia práctica con Autoencoders Variacionales (VAEs) y sus variaciones. Al implementar un VAE básico, Beta-VAE y VAE Condicional, podrás profundizar en tu comprensión de estos modelos y sus aplicaciones.
Estos ejercicios cubren aspectos esenciales de los VAEs, incluyendo generación de imágenes, reconstrucción, visualización de espacios latentes y generación condicional, lo que te ayudará a aplicar estas técnicas a diversas tareas en modelado generativo.
Ejercicios Prácticos
Esta sección proporciona ejercicios prácticos para reforzar tu comprensión de los Autoencoders Variacionales (VAEs) y sus variaciones. Cada ejercicio incluye un enunciado del problema y una solución con ejemplos de código cuando corresponda.
Ejercicio 1: Implementar un VAE Básico
Enunciado del Problema: Implementa un Autoencoder Variacional (VAE) básico usando TensorFlow y Keras. Entrena el VAE en el conjunto de datos MNIST y visualiza las imágenes reconstruidas.
Solución:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Lambda, Layer
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
import numpy as np
import matplotlib.pyplot as plt
# Load the MNIST dataset
(x_train, _), (x_test, _) = tf.keras.datasets.mnist.load_data()
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = x_train.reshape((x_train.shape[0], -1))
x_test = x_test.reshape((x_test.shape[0], -1))
# Sampling layer using the reparameterization trick
class Sampling(Layer):
def call(self, inputs):
z_mean, z_log_var = inputs
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = K.random_normal(shape=(batch, dim))
return z_mean + K.exp(0.5 * z_log_var) * epsilon
# Encoder network
def build_encoder(input_shape, latent_dim):
inputs = Input(shape=input_shape)
x = Dense(512, activation='relu')(inputs)
x = Dense(256, activation='relu')(x)
z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)
z = Sampling()([z_mean, z_log_var])
return Model(inputs, [z_mean, z_log_var, z], name='encoder')
# Decoder network
def build_decoder(latent_dim, output_shape):
latent_inputs = Input
```python
(shape=(latent_dim,))
x = Dense(256, activation='relu')(latent_inputs)
x = Dense(512, activation='relu')(x)
outputs = Dense(output_shape, activation='sigmoid')(x)
return Model(latent_inputs, outputs, name='decoder')
# Define the input shape and latent dimension
input_shape = (784,)
latent_dim = 2
# Build the encoder and decoder
encoder = build_encoder(input_shape, latent_dim)
decoder = build_decoder(latent_dim, input_shape[0])
# Define the VAE model
inputs = Input(shape=input_shape)
z_mean, z_log_var, z = encoder(inputs)
outputs = decoder(z)
vae = Model(inputs, outputs, name='vae')
# Define the VAE loss function
def vae_loss(inputs, outputs, z_mean, z_log_var):
reconstruction_loss = tf.keras.losses.binary_crossentropy(inputs, outputs)
reconstruction_loss *= input_shape[0]
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
return K.mean(reconstruction_loss + kl_loss)
# Compile the VAE model
vae.compile(optimizer='adam', loss=lambda x, y: vae_loss(x, y, z_mean, z_log_var))
# Train the VAE model
vae.fit(x_train, x_train, epochs=50, batch_size=128, validation_data=(x_test, x_test))
# Reconstruct images
def reconstruct_images(vae, x_test, n_samples=10):
reconstructed_images = vae.predict(x_test[:n_samples])
original_images = x_test[:n_samples].reshape((n_samples, 28, 28))
reconstructed_images = reconstructed_images.reshape((n_samples, 28, 28))
plt.figure(figsize=(10, 4))
for i in range(n_samples):
plt.subplot(2, n_samples, i + 1)
plt.imshow(original_images[i], cmap='gray')
plt.axis('off')
plt.subplot(2, n_samples, n_samples + i + 1)
plt.imshow(reconstructed_images[i], cmap='gray')
plt.axis('off')
plt.show()
# Visualize the reconstructed images
reconstruct_images(vae, x_test)
Ejercicio 2: Implementar un Beta-VAE
Enunciado del Problema: Implementa un Beta-VAE modificando el VAE básico para incluir un hiperparámetro beta. Entrena el Beta-VAE en el conjunto de datos MNIST y visualiza el espacio latente.
Solución:
# Define the Beta-VAE loss function
def beta_vae_loss(inputs, outputs, z_mean, z_log_var, beta=4.0):
reconstruction_loss = tf.keras.losses.binary_crossentropy(inputs, outputs)
reconstruction_loss *= input_shape[0]
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
return K.mean(reconstruction_loss + beta * kl_loss)
# Compile the Beta-VAE model
vae.compile(optimizer='adam', loss=lambda x, y: beta_vae_loss(x, y, z_mean, z_log_var, beta=4.0))
# Train the Beta-VAE model
vae.fit(x_train, x_train, epochs=50, batch_size=128, validation_data=(x_test, x_test))
# Visualize the latent space
def plot_latent_space(encoder, x_test, y_test, n_samples=10000):
z_mean, _, _ = encoder.predict(x_test)
plt.figure(figsize=(10, 10))
scatter = plt.scatter(z_mean[:n_samples, 0], z_mean[:n_samples, 1], c=y_test[:n_samples], cmap='viridis')
plt.colorbar(scatter)
plt.xlabel('z[0]')
plt.ylabel('z[1]')
plt.title('Latent Space')
plt.show()
# Plot the latent space
plot_latent_space(encoder, x_test, _)
Ejercicio 3: Implementar un VAE Condicional (CVAE)
Enunciado del Problema: Implementa un Variational Autoencoder Condicional (CVAE) que se condicione en las etiquetas de dígitos del conjunto de datos MNIST. Entrena el CVAE y genera imágenes condicionadas a etiquetas específicas.
Solución:
from tensorflow.keras.layers import Concatenate
# Encoder network for CVAE
def build_cvae_encoder(input_shape, num_classes, latent_dim):
inputs = Input(shape=input_shape)
labels = Input(shape=(num_classes,))
x = Dense(512, activation='relu')(inputs)
x = Concatenate()([x, labels])
x = Dense(256, activation='relu')(x)
z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)
z = Sampling()([z_mean, z_log_var])
return Model([inputs, labels], [z_mean, z_log_var, z], name='cvae_encoder')
# Decoder network for CVAE
def build_cvae_decoder(latent_dim, num_classes, output_shape):
latent_inputs = Input(shape=(latent_dim,))
labels = Input(shape=(num_classes,))
x = Dense(256, activation='relu')(latent_inputs)
x = Concatenate()([x, labels])
x = Dense(512, activation='relu')(x)
outputs = Dense(output_shape, activation='sigmoid')(x)
return Model([latent_inputs, labels], outputs, name='cvae_decoder')
# Define the input shape, number of classes, and latent dimension
input_shape = (784,)
num_classes = 10
latent_dim = 2
# Build the encoder and decoder for CVAE
cvae_encoder = build_cvae_encoder(input_shape, num_classes, latent_dim)
cvae_decoder = build_cvae_decoder(latent_dim, num_classes, input_shape[0])
# Define the Conditional VAE model
inputs = Input(shape=input_shape)
labels = Input(shape=(num_classes,))
z_mean, z_log_var, z = cvae_encoder([inputs, labels])
outputs = cvae_decoder([z, labels])
cvae = Model([inputs, labels], outputs, name='cvae')
# Define the CVAE loss function
def cvae_loss(inputs, outputs, z_mean, z_log_var):
reconstruction_loss = tf.keras.losses.binary_crossentropy(inputs, outputs)
reconstruction_loss *= input_shape[0]
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
return K.mean(reconstruction_loss + kl_loss)
# Compile the CVAE model
cvae.compile(optimizer='adam', loss=lambda x, y: cvae_loss(x, y, z_mean, z_log_var))
# Prepare the labels for training
y_train = tf.keras.utils.to_categorical(_, num_classes)
y_test = tf.keras.utils.to_categorical(_, num_classes)
# Train the CVAE model
cvae.fit([x_train, y_train], x_train, epochs=50, batch_size=128, validation_data=([x_test, y_test], x_test))
# Generate images conditioned on specific labels
def generate_conditioned_images(cvae_decoder, label, latent_dim, num_classes, n_samples=10):
label_vector = tf.keras.utils.to_categorical([label] * n_samples, num_classes)
random_latent_vectors = np.random.normal(size=(n_samples, latent_dim))
generated_images = cvae_decoder.predict([random_latent_vectors, label_vector])
generated_images = generated_images.reshape((n_samples, 28, 28))
plt.figure(figsize=(10, 2))
for i in range(n_samples):
plt.subplot(1, n_samples, i + 1)
plt.imshow(generated_images[i], cmap='gray')
plt.axis('off')
plt.show()
# Generate and visualize images conditioned on the digit '5'
generate_conditioned_images(cvae_decoder, 5, latent_dim, num_classes)
Estos ejercicios prácticos proporcionan experiencia práctica con Autoencoders Variacionales (VAEs) y sus variaciones. Al implementar un VAE básico, Beta-VAE y VAE Condicional, podrás profundizar en tu comprensión de estos modelos y sus aplicaciones.
Estos ejercicios cubren aspectos esenciales de los VAEs, incluyendo generación de imágenes, reconstrucción, visualización de espacios latentes y generación condicional, lo que te ayudará a aplicar estas técnicas a diversas tareas en modelado generativo.
Ejercicios Prácticos
Esta sección proporciona ejercicios prácticos para reforzar tu comprensión de los Autoencoders Variacionales (VAEs) y sus variaciones. Cada ejercicio incluye un enunciado del problema y una solución con ejemplos de código cuando corresponda.
Ejercicio 1: Implementar un VAE Básico
Enunciado del Problema: Implementa un Autoencoder Variacional (VAE) básico usando TensorFlow y Keras. Entrena el VAE en el conjunto de datos MNIST y visualiza las imágenes reconstruidas.
Solución:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Lambda, Layer
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
import numpy as np
import matplotlib.pyplot as plt
# Load the MNIST dataset
(x_train, _), (x_test, _) = tf.keras.datasets.mnist.load_data()
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = x_train.reshape((x_train.shape[0], -1))
x_test = x_test.reshape((x_test.shape[0], -1))
# Sampling layer using the reparameterization trick
class Sampling(Layer):
def call(self, inputs):
z_mean, z_log_var = inputs
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = K.random_normal(shape=(batch, dim))
return z_mean + K.exp(0.5 * z_log_var) * epsilon
# Encoder network
def build_encoder(input_shape, latent_dim):
inputs = Input(shape=input_shape)
x = Dense(512, activation='relu')(inputs)
x = Dense(256, activation='relu')(x)
z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)
z = Sampling()([z_mean, z_log_var])
return Model(inputs, [z_mean, z_log_var, z], name='encoder')
# Decoder network
def build_decoder(latent_dim, output_shape):
latent_inputs = Input
```python
(shape=(latent_dim,))
x = Dense(256, activation='relu')(latent_inputs)
x = Dense(512, activation='relu')(x)
outputs = Dense(output_shape, activation='sigmoid')(x)
return Model(latent_inputs, outputs, name='decoder')
# Define the input shape and latent dimension
input_shape = (784,)
latent_dim = 2
# Build the encoder and decoder
encoder = build_encoder(input_shape, latent_dim)
decoder = build_decoder(latent_dim, input_shape[0])
# Define the VAE model
inputs = Input(shape=input_shape)
z_mean, z_log_var, z = encoder(inputs)
outputs = decoder(z)
vae = Model(inputs, outputs, name='vae')
# Define the VAE loss function
def vae_loss(inputs, outputs, z_mean, z_log_var):
reconstruction_loss = tf.keras.losses.binary_crossentropy(inputs, outputs)
reconstruction_loss *= input_shape[0]
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
return K.mean(reconstruction_loss + kl_loss)
# Compile the VAE model
vae.compile(optimizer='adam', loss=lambda x, y: vae_loss(x, y, z_mean, z_log_var))
# Train the VAE model
vae.fit(x_train, x_train, epochs=50, batch_size=128, validation_data=(x_test, x_test))
# Reconstruct images
def reconstruct_images(vae, x_test, n_samples=10):
reconstructed_images = vae.predict(x_test[:n_samples])
original_images = x_test[:n_samples].reshape((n_samples, 28, 28))
reconstructed_images = reconstructed_images.reshape((n_samples, 28, 28))
plt.figure(figsize=(10, 4))
for i in range(n_samples):
plt.subplot(2, n_samples, i + 1)
plt.imshow(original_images[i], cmap='gray')
plt.axis('off')
plt.subplot(2, n_samples, n_samples + i + 1)
plt.imshow(reconstructed_images[i], cmap='gray')
plt.axis('off')
plt.show()
# Visualize the reconstructed images
reconstruct_images(vae, x_test)
Ejercicio 2: Implementar un Beta-VAE
Enunciado del Problema: Implementa un Beta-VAE modificando el VAE básico para incluir un hiperparámetro beta. Entrena el Beta-VAE en el conjunto de datos MNIST y visualiza el espacio latente.
Solución:
# Define the Beta-VAE loss function
def beta_vae_loss(inputs, outputs, z_mean, z_log_var, beta=4.0):
reconstruction_loss = tf.keras.losses.binary_crossentropy(inputs, outputs)
reconstruction_loss *= input_shape[0]
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
return K.mean(reconstruction_loss + beta * kl_loss)
# Compile the Beta-VAE model
vae.compile(optimizer='adam', loss=lambda x, y: beta_vae_loss(x, y, z_mean, z_log_var, beta=4.0))
# Train the Beta-VAE model
vae.fit(x_train, x_train, epochs=50, batch_size=128, validation_data=(x_test, x_test))
# Visualize the latent space
def plot_latent_space(encoder, x_test, y_test, n_samples=10000):
z_mean, _, _ = encoder.predict(x_test)
plt.figure(figsize=(10, 10))
scatter = plt.scatter(z_mean[:n_samples, 0], z_mean[:n_samples, 1], c=y_test[:n_samples], cmap='viridis')
plt.colorbar(scatter)
plt.xlabel('z[0]')
plt.ylabel('z[1]')
plt.title('Latent Space')
plt.show()
# Plot the latent space
plot_latent_space(encoder, x_test, _)
Ejercicio 3: Implementar un VAE Condicional (CVAE)
Enunciado del Problema: Implementa un Variational Autoencoder Condicional (CVAE) que se condicione en las etiquetas de dígitos del conjunto de datos MNIST. Entrena el CVAE y genera imágenes condicionadas a etiquetas específicas.
Solución:
from tensorflow.keras.layers import Concatenate
# Encoder network for CVAE
def build_cvae_encoder(input_shape, num_classes, latent_dim):
inputs = Input(shape=input_shape)
labels = Input(shape=(num_classes,))
x = Dense(512, activation='relu')(inputs)
x = Concatenate()([x, labels])
x = Dense(256, activation='relu')(x)
z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)
z = Sampling()([z_mean, z_log_var])
return Model([inputs, labels], [z_mean, z_log_var, z], name='cvae_encoder')
# Decoder network for CVAE
def build_cvae_decoder(latent_dim, num_classes, output_shape):
latent_inputs = Input(shape=(latent_dim,))
labels = Input(shape=(num_classes,))
x = Dense(256, activation='relu')(latent_inputs)
x = Concatenate()([x, labels])
x = Dense(512, activation='relu')(x)
outputs = Dense(output_shape, activation='sigmoid')(x)
return Model([latent_inputs, labels], outputs, name='cvae_decoder')
# Define the input shape, number of classes, and latent dimension
input_shape = (784,)
num_classes = 10
latent_dim = 2
# Build the encoder and decoder for CVAE
cvae_encoder = build_cvae_encoder(input_shape, num_classes, latent_dim)
cvae_decoder = build_cvae_decoder(latent_dim, num_classes, input_shape[0])
# Define the Conditional VAE model
inputs = Input(shape=input_shape)
labels = Input(shape=(num_classes,))
z_mean, z_log_var, z = cvae_encoder([inputs, labels])
outputs = cvae_decoder([z, labels])
cvae = Model([inputs, labels], outputs, name='cvae')
# Define the CVAE loss function
def cvae_loss(inputs, outputs, z_mean, z_log_var):
reconstruction_loss = tf.keras.losses.binary_crossentropy(inputs, outputs)
reconstruction_loss *= input_shape[0]
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
return K.mean(reconstruction_loss + kl_loss)
# Compile the CVAE model
cvae.compile(optimizer='adam', loss=lambda x, y: cvae_loss(x, y, z_mean, z_log_var))
# Prepare the labels for training
y_train = tf.keras.utils.to_categorical(_, num_classes)
y_test = tf.keras.utils.to_categorical(_, num_classes)
# Train the CVAE model
cvae.fit([x_train, y_train], x_train, epochs=50, batch_size=128, validation_data=([x_test, y_test], x_test))
# Generate images conditioned on specific labels
def generate_conditioned_images(cvae_decoder, label, latent_dim, num_classes, n_samples=10):
label_vector = tf.keras.utils.to_categorical([label] * n_samples, num_classes)
random_latent_vectors = np.random.normal(size=(n_samples, latent_dim))
generated_images = cvae_decoder.predict([random_latent_vectors, label_vector])
generated_images = generated_images.reshape((n_samples, 28, 28))
plt.figure(figsize=(10, 2))
for i in range(n_samples):
plt.subplot(1, n_samples, i + 1)
plt.imshow(generated_images[i], cmap='gray')
plt.axis('off')
plt.show()
# Generate and visualize images conditioned on the digit '5'
generate_conditioned_images(cvae_decoder, 5, latent_dim, num_classes)
Estos ejercicios prácticos proporcionan experiencia práctica con Autoencoders Variacionales (VAEs) y sus variaciones. Al implementar un VAE básico, Beta-VAE y VAE Condicional, podrás profundizar en tu comprensión de estos modelos y sus aplicaciones.
Estos ejercicios cubren aspectos esenciales de los VAEs, incluyendo generación de imágenes, reconstrucción, visualización de espacios latentes y generación condicional, lo que te ayudará a aplicar estas técnicas a diversas tareas en modelado generativo.