Project: Visual Story Generator: GPT-4o + DALL·E image flow based on prompt narrative
3. Setup and Complete Code
Rather than presenting the code in fragmented pieces, below you'll find a comprehensive, production-ready Python script that incorporates everything you need to build this visual story generator.
The script includes robust error handling for API interactions, proper management of asynchronous operations, and reliable image file retrieval and storage. It demonstrates best practices for working with the OpenAI Assistants API, handling both text and image generation, and managing the flow of data between different components of the system. After the code, we'll break down each major section to understand how it works and why certain implementation choices were made.
Prerequisites:
1. Install libraries: pip install openai python-dotenv
2. Create a .env
file in the same directory: OPENAI_API_KEY=your-api-key-here
# --- Visual Story Generator ---
# Uses OpenAI Assistants API (GPT-4o + DALL-E 3)
import os
import time
import datetime
from openai import OpenAI, OpenAIError
from dotenv import load_dotenv
# --- Configuration ---
load_dotenv()
ASSISTANT_NAME = "Visual Storyteller"
ASSISTANT_MODEL = "gpt-4o"
ASSISTANT_INSTRUCTIONS = (
"You are a visual storyteller. When a user gives you a story idea, "
"break it into 4–6 scenes. For each scene, first write a paragraph vividly describing the scene. "
"After describing the scene, explicitly call the image generation tool to create an image that matches that specific scene description. "
"Ensure each scene description is followed by its corresponding image generation call."
)
POLLING_INTERVAL_S = 2 # How often to check run status
RUN_TIMEOUT_S = 300 # Max time to wait for a run to complete
# --- Initialize OpenAI Client ---
try:
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
raise ValueError("OPENAI_API_KEY not found in environment variables.")
client = OpenAI(api_key=api_key)
print("OpenAI client initialized.")
except ValueError as e:
print(f"Configuration Error: {e}")
exit()
except Exception as e:
print(f"Error initializing OpenAI client: {e}")
exit()
# --- Helper Function: Create or Retrieve Assistant ---
def create_or_retrieve_assistant(client, name, instructions, model):
"""Creates a new assistant or retrieves an existing one by name."""
try:
# Check if assistant with this name already exists (simple check)
assistants = client.beta.assistants.list(limit=100)
for assistant in assistants.data:
if assistant.name == name:
print(f"Retrieved existing assistant '{name}' (ID: {assistant.id})")
return assistant
# If not found, create a new one
print(f"Creating new assistant '{name}'...")
assistant = client.beta.assistants.create(
name=name,
instructions=instructions,
model=model,
tools=[{"type": "image_generation"}] # Enable DALL-E 3 tool
)
print(f"Created new assistant '{name}' (ID: {assistant.id})")
return assistant
except OpenAIError as e:
print(f"OpenAI API Error creating/retrieving assistant: {e}")
return None
except Exception as e:
print(f"Unexpected error creating/retrieving assistant: {e}")
return None
# --- Helper Function: Poll Run Status ---
def poll_run_status(client, thread_id, run_id, timeout=RUN_TIMEOUT_S):
"""Polls the status of an assistant run until completion or timeout."""
start_time = time.time()
while time.time() - start_time < timeout:
try:
run_status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run_id)
print(f"Run ID: {run_id}, Status: {run_status.status}")
if run_status.status == "completed":
print("Run completed successfully.")
return run_status
elif run_status.status in ["queued", "in_progress", "requires_action"]:
time.sleep(POLLING_INTERVAL_S)
else: # failed, cancelled, expired
print(f"Run terminated with status: {run_status.status}")
return run_status # Return the final status object
except OpenAIError as e:
print(f"OpenAI API Error polling run status: {e}")
time.sleep(POLLING_INTERVAL_S) # Wait before retrying
except Exception as e:
print(f"Unexpected error polling run status: {e}")
return None # Exit polling on unexpected error
print("Run polling timed out.")
return None
# --- Helper Function: Retrieve and Save Image File ---
def save_image_from_file_id(client, file_id, output_dir="story_images"):
"""Retrieves image content using file_id and saves it."""
try:
# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)
# Retrieve file content (binary)
print(f"Retrieving content for file ID: {file_id}...")
response = client.files.retrieve_content(file_id)
image_data = response.read() # Read binary content
# Construct filename (ensure uniqueness if needed)
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
output_filename = os.path.join(output_dir, f"scene_{timestamp}_{file_id}.png")
# Save the image data to a file
with open(output_filename, "wb") as f:
f.write(image_data)
print(f"Image saved successfully as '{output_filename}'")
return output_filename
except OpenAIError as e:
print(f"OpenAI API Error retrieving file content for {file_id}: {e}")
return None
except Exception as e:
print(f"Unexpected error retrieving/saving file {file_id}: {e}")
return None
# --- Main Execution Logic ---
def generate_visual_story(user_prompt):
"""Generates a visual story based on the user prompt."""
print("\n--- Starting Visual Story Generation ---")
print(f"User Prompt: \"{user_prompt}\"")
# 1. Get the Assistant
assistant = create_or_retrieve_assistant(
client, ASSISTANT_NAME, ASSISTANT_INSTRUCTIONS, ASSISTANT_MODEL
)
if not assistant:
return
# 2. Create a Thread
try:
thread = client.beta.threads.create()
print(f"Created new thread (ID: {thread.id})")
except OpenAIError as e:
print(f"OpenAI API Error creating thread: {e}")
return
except Exception as e:
print(f"Unexpected error creating thread: {e}")
return
# 3. Add User Message to Thread
try:
client.beta.threads.messages.create(
thread_id=thread.id,
role="user",
content=user_prompt
)
print("User message added to thread.")
except OpenAIError as e:
print(f"OpenAI API Error adding message: {e}")
return
except Exception as e:
print(f"Unexpected error adding message: {e}")
return
# 4. Run the Assistant
try:
run = client.beta.threads.runs.create(
assistant_id=assistant.id,
thread_id=thread.id
# Instructions could be overridden here if needed
)
print(f"Assistant run initiated (Run ID: {run.id})")
except OpenAIError as e:
print(f"OpenAI API Error creating run: {e}")
return
except Exception as e:
print(f"Unexpected error creating run: {e}")
return
# 5. Poll for Run Completion
final_run_status = poll_run_status(client, thread.id, run.id)
if not final_run_status or final_run_status.status != "completed":
print("Story generation did not complete successfully.")
return
# 6. Retrieve and Process Messages
print("\n--- Retrieving Story Content ---")
try:
messages_response = client.beta.threads.messages.list(thread_id=thread.id, order="asc") # Get messages oldest to newest
messages_data = messages_response.data
scene_count = 0
for msg in messages_data:
if msg.role == "assistant": # Process only assistant messages
print(f"\nProcessing Assistant Message (ID: {msg.id})")
for content_block in msg.content:
if content_block.type == "text":
scene_count += 1 # Assume text block starts a new scene description
print(f"\n--- Scene {scene_count} Description ---")
print(content_block.text.value)
elif content_block.type == "image_file":
print(f"\n--- Scene {scene_count} Image ---")
file_id = content_block.image_file.file_id
print(f"Image File ID: {file_id}")
# Retrieve and save the actual image
save_image_from_file_id(client, file_id)
except OpenAIError as e:
print(f"OpenAI API Error retrieving messages: {e}")
except Exception as e:
print(f"Unexpected error retrieving messages: {e}")
print("\n--- Visual Story Generation Finished ---")
# --- Run the Generator ---
if __name__ == "__main__":
story_idea = "Tell a short story about a curious cat who discovers a hidden garden behind its house."
# story_idea = "Write a sci-fi tale about an astronaut finding ancient ruins on Mars."
# story_idea = "A lonely lighthouse keeper befriends a migrating whale."
generate_visual_story(story_idea)
3. Setup and Complete Code
Rather than presenting the code in fragmented pieces, below you'll find a comprehensive, production-ready Python script that incorporates everything you need to build this visual story generator.
The script includes robust error handling for API interactions, proper management of asynchronous operations, and reliable image file retrieval and storage. It demonstrates best practices for working with the OpenAI Assistants API, handling both text and image generation, and managing the flow of data between different components of the system. After the code, we'll break down each major section to understand how it works and why certain implementation choices were made.
Prerequisites:
1. Install libraries: pip install openai python-dotenv
2. Create a .env
file in the same directory: OPENAI_API_KEY=your-api-key-here
# --- Visual Story Generator ---
# Uses OpenAI Assistants API (GPT-4o + DALL-E 3)
import os
import time
import datetime
from openai import OpenAI, OpenAIError
from dotenv import load_dotenv
# --- Configuration ---
load_dotenv()
ASSISTANT_NAME = "Visual Storyteller"
ASSISTANT_MODEL = "gpt-4o"
ASSISTANT_INSTRUCTIONS = (
"You are a visual storyteller. When a user gives you a story idea, "
"break it into 4–6 scenes. For each scene, first write a paragraph vividly describing the scene. "
"After describing the scene, explicitly call the image generation tool to create an image that matches that specific scene description. "
"Ensure each scene description is followed by its corresponding image generation call."
)
POLLING_INTERVAL_S = 2 # How often to check run status
RUN_TIMEOUT_S = 300 # Max time to wait for a run to complete
# --- Initialize OpenAI Client ---
try:
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
raise ValueError("OPENAI_API_KEY not found in environment variables.")
client = OpenAI(api_key=api_key)
print("OpenAI client initialized.")
except ValueError as e:
print(f"Configuration Error: {e}")
exit()
except Exception as e:
print(f"Error initializing OpenAI client: {e}")
exit()
# --- Helper Function: Create or Retrieve Assistant ---
def create_or_retrieve_assistant(client, name, instructions, model):
"""Creates a new assistant or retrieves an existing one by name."""
try:
# Check if assistant with this name already exists (simple check)
assistants = client.beta.assistants.list(limit=100)
for assistant in assistants.data:
if assistant.name == name:
print(f"Retrieved existing assistant '{name}' (ID: {assistant.id})")
return assistant
# If not found, create a new one
print(f"Creating new assistant '{name}'...")
assistant = client.beta.assistants.create(
name=name,
instructions=instructions,
model=model,
tools=[{"type": "image_generation"}] # Enable DALL-E 3 tool
)
print(f"Created new assistant '{name}' (ID: {assistant.id})")
return assistant
except OpenAIError as e:
print(f"OpenAI API Error creating/retrieving assistant: {e}")
return None
except Exception as e:
print(f"Unexpected error creating/retrieving assistant: {e}")
return None
# --- Helper Function: Poll Run Status ---
def poll_run_status(client, thread_id, run_id, timeout=RUN_TIMEOUT_S):
"""Polls the status of an assistant run until completion or timeout."""
start_time = time.time()
while time.time() - start_time < timeout:
try:
run_status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run_id)
print(f"Run ID: {run_id}, Status: {run_status.status}")
if run_status.status == "completed":
print("Run completed successfully.")
return run_status
elif run_status.status in ["queued", "in_progress", "requires_action"]:
time.sleep(POLLING_INTERVAL_S)
else: # failed, cancelled, expired
print(f"Run terminated with status: {run_status.status}")
return run_status # Return the final status object
except OpenAIError as e:
print(f"OpenAI API Error polling run status: {e}")
time.sleep(POLLING_INTERVAL_S) # Wait before retrying
except Exception as e:
print(f"Unexpected error polling run status: {e}")
return None # Exit polling on unexpected error
print("Run polling timed out.")
return None
# --- Helper Function: Retrieve and Save Image File ---
def save_image_from_file_id(client, file_id, output_dir="story_images"):
"""Retrieves image content using file_id and saves it."""
try:
# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)
# Retrieve file content (binary)
print(f"Retrieving content for file ID: {file_id}...")
response = client.files.retrieve_content(file_id)
image_data = response.read() # Read binary content
# Construct filename (ensure uniqueness if needed)
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
output_filename = os.path.join(output_dir, f"scene_{timestamp}_{file_id}.png")
# Save the image data to a file
with open(output_filename, "wb") as f:
f.write(image_data)
print(f"Image saved successfully as '{output_filename}'")
return output_filename
except OpenAIError as e:
print(f"OpenAI API Error retrieving file content for {file_id}: {e}")
return None
except Exception as e:
print(f"Unexpected error retrieving/saving file {file_id}: {e}")
return None
# --- Main Execution Logic ---
def generate_visual_story(user_prompt):
"""Generates a visual story based on the user prompt."""
print("\n--- Starting Visual Story Generation ---")
print(f"User Prompt: \"{user_prompt}\"")
# 1. Get the Assistant
assistant = create_or_retrieve_assistant(
client, ASSISTANT_NAME, ASSISTANT_INSTRUCTIONS, ASSISTANT_MODEL
)
if not assistant:
return
# 2. Create a Thread
try:
thread = client.beta.threads.create()
print(f"Created new thread (ID: {thread.id})")
except OpenAIError as e:
print(f"OpenAI API Error creating thread: {e}")
return
except Exception as e:
print(f"Unexpected error creating thread: {e}")
return
# 3. Add User Message to Thread
try:
client.beta.threads.messages.create(
thread_id=thread.id,
role="user",
content=user_prompt
)
print("User message added to thread.")
except OpenAIError as e:
print(f"OpenAI API Error adding message: {e}")
return
except Exception as e:
print(f"Unexpected error adding message: {e}")
return
# 4. Run the Assistant
try:
run = client.beta.threads.runs.create(
assistant_id=assistant.id,
thread_id=thread.id
# Instructions could be overridden here if needed
)
print(f"Assistant run initiated (Run ID: {run.id})")
except OpenAIError as e:
print(f"OpenAI API Error creating run: {e}")
return
except Exception as e:
print(f"Unexpected error creating run: {e}")
return
# 5. Poll for Run Completion
final_run_status = poll_run_status(client, thread.id, run.id)
if not final_run_status or final_run_status.status != "completed":
print("Story generation did not complete successfully.")
return
# 6. Retrieve and Process Messages
print("\n--- Retrieving Story Content ---")
try:
messages_response = client.beta.threads.messages.list(thread_id=thread.id, order="asc") # Get messages oldest to newest
messages_data = messages_response.data
scene_count = 0
for msg in messages_data:
if msg.role == "assistant": # Process only assistant messages
print(f"\nProcessing Assistant Message (ID: {msg.id})")
for content_block in msg.content:
if content_block.type == "text":
scene_count += 1 # Assume text block starts a new scene description
print(f"\n--- Scene {scene_count} Description ---")
print(content_block.text.value)
elif content_block.type == "image_file":
print(f"\n--- Scene {scene_count} Image ---")
file_id = content_block.image_file.file_id
print(f"Image File ID: {file_id}")
# Retrieve and save the actual image
save_image_from_file_id(client, file_id)
except OpenAIError as e:
print(f"OpenAI API Error retrieving messages: {e}")
except Exception as e:
print(f"Unexpected error retrieving messages: {e}")
print("\n--- Visual Story Generation Finished ---")
# --- Run the Generator ---
if __name__ == "__main__":
story_idea = "Tell a short story about a curious cat who discovers a hidden garden behind its house."
# story_idea = "Write a sci-fi tale about an astronaut finding ancient ruins on Mars."
# story_idea = "A lonely lighthouse keeper befriends a migrating whale."
generate_visual_story(story_idea)
3. Setup and Complete Code
Rather than presenting the code in fragmented pieces, below you'll find a comprehensive, production-ready Python script that incorporates everything you need to build this visual story generator.
The script includes robust error handling for API interactions, proper management of asynchronous operations, and reliable image file retrieval and storage. It demonstrates best practices for working with the OpenAI Assistants API, handling both text and image generation, and managing the flow of data between different components of the system. After the code, we'll break down each major section to understand how it works and why certain implementation choices were made.
Prerequisites:
1. Install libraries: pip install openai python-dotenv
2. Create a .env
file in the same directory: OPENAI_API_KEY=your-api-key-here
# --- Visual Story Generator ---
# Uses OpenAI Assistants API (GPT-4o + DALL-E 3)
import os
import time
import datetime
from openai import OpenAI, OpenAIError
from dotenv import load_dotenv
# --- Configuration ---
load_dotenv()
ASSISTANT_NAME = "Visual Storyteller"
ASSISTANT_MODEL = "gpt-4o"
ASSISTANT_INSTRUCTIONS = (
"You are a visual storyteller. When a user gives you a story idea, "
"break it into 4–6 scenes. For each scene, first write a paragraph vividly describing the scene. "
"After describing the scene, explicitly call the image generation tool to create an image that matches that specific scene description. "
"Ensure each scene description is followed by its corresponding image generation call."
)
POLLING_INTERVAL_S = 2 # How often to check run status
RUN_TIMEOUT_S = 300 # Max time to wait for a run to complete
# --- Initialize OpenAI Client ---
try:
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
raise ValueError("OPENAI_API_KEY not found in environment variables.")
client = OpenAI(api_key=api_key)
print("OpenAI client initialized.")
except ValueError as e:
print(f"Configuration Error: {e}")
exit()
except Exception as e:
print(f"Error initializing OpenAI client: {e}")
exit()
# --- Helper Function: Create or Retrieve Assistant ---
def create_or_retrieve_assistant(client, name, instructions, model):
"""Creates a new assistant or retrieves an existing one by name."""
try:
# Check if assistant with this name already exists (simple check)
assistants = client.beta.assistants.list(limit=100)
for assistant in assistants.data:
if assistant.name == name:
print(f"Retrieved existing assistant '{name}' (ID: {assistant.id})")
return assistant
# If not found, create a new one
print(f"Creating new assistant '{name}'...")
assistant = client.beta.assistants.create(
name=name,
instructions=instructions,
model=model,
tools=[{"type": "image_generation"}] # Enable DALL-E 3 tool
)
print(f"Created new assistant '{name}' (ID: {assistant.id})")
return assistant
except OpenAIError as e:
print(f"OpenAI API Error creating/retrieving assistant: {e}")
return None
except Exception as e:
print(f"Unexpected error creating/retrieving assistant: {e}")
return None
# --- Helper Function: Poll Run Status ---
def poll_run_status(client, thread_id, run_id, timeout=RUN_TIMEOUT_S):
"""Polls the status of an assistant run until completion or timeout."""
start_time = time.time()
while time.time() - start_time < timeout:
try:
run_status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run_id)
print(f"Run ID: {run_id}, Status: {run_status.status}")
if run_status.status == "completed":
print("Run completed successfully.")
return run_status
elif run_status.status in ["queued", "in_progress", "requires_action"]:
time.sleep(POLLING_INTERVAL_S)
else: # failed, cancelled, expired
print(f"Run terminated with status: {run_status.status}")
return run_status # Return the final status object
except OpenAIError as e:
print(f"OpenAI API Error polling run status: {e}")
time.sleep(POLLING_INTERVAL_S) # Wait before retrying
except Exception as e:
print(f"Unexpected error polling run status: {e}")
return None # Exit polling on unexpected error
print("Run polling timed out.")
return None
# --- Helper Function: Retrieve and Save Image File ---
def save_image_from_file_id(client, file_id, output_dir="story_images"):
"""Retrieves image content using file_id and saves it."""
try:
# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)
# Retrieve file content (binary)
print(f"Retrieving content for file ID: {file_id}...")
response = client.files.retrieve_content(file_id)
image_data = response.read() # Read binary content
# Construct filename (ensure uniqueness if needed)
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
output_filename = os.path.join(output_dir, f"scene_{timestamp}_{file_id}.png")
# Save the image data to a file
with open(output_filename, "wb") as f:
f.write(image_data)
print(f"Image saved successfully as '{output_filename}'")
return output_filename
except OpenAIError as e:
print(f"OpenAI API Error retrieving file content for {file_id}: {e}")
return None
except Exception as e:
print(f"Unexpected error retrieving/saving file {file_id}: {e}")
return None
# --- Main Execution Logic ---
def generate_visual_story(user_prompt):
"""Generates a visual story based on the user prompt."""
print("\n--- Starting Visual Story Generation ---")
print(f"User Prompt: \"{user_prompt}\"")
# 1. Get the Assistant
assistant = create_or_retrieve_assistant(
client, ASSISTANT_NAME, ASSISTANT_INSTRUCTIONS, ASSISTANT_MODEL
)
if not assistant:
return
# 2. Create a Thread
try:
thread = client.beta.threads.create()
print(f"Created new thread (ID: {thread.id})")
except OpenAIError as e:
print(f"OpenAI API Error creating thread: {e}")
return
except Exception as e:
print(f"Unexpected error creating thread: {e}")
return
# 3. Add User Message to Thread
try:
client.beta.threads.messages.create(
thread_id=thread.id,
role="user",
content=user_prompt
)
print("User message added to thread.")
except OpenAIError as e:
print(f"OpenAI API Error adding message: {e}")
return
except Exception as e:
print(f"Unexpected error adding message: {e}")
return
# 4. Run the Assistant
try:
run = client.beta.threads.runs.create(
assistant_id=assistant.id,
thread_id=thread.id
# Instructions could be overridden here if needed
)
print(f"Assistant run initiated (Run ID: {run.id})")
except OpenAIError as e:
print(f"OpenAI API Error creating run: {e}")
return
except Exception as e:
print(f"Unexpected error creating run: {e}")
return
# 5. Poll for Run Completion
final_run_status = poll_run_status(client, thread.id, run.id)
if not final_run_status or final_run_status.status != "completed":
print("Story generation did not complete successfully.")
return
# 6. Retrieve and Process Messages
print("\n--- Retrieving Story Content ---")
try:
messages_response = client.beta.threads.messages.list(thread_id=thread.id, order="asc") # Get messages oldest to newest
messages_data = messages_response.data
scene_count = 0
for msg in messages_data:
if msg.role == "assistant": # Process only assistant messages
print(f"\nProcessing Assistant Message (ID: {msg.id})")
for content_block in msg.content:
if content_block.type == "text":
scene_count += 1 # Assume text block starts a new scene description
print(f"\n--- Scene {scene_count} Description ---")
print(content_block.text.value)
elif content_block.type == "image_file":
print(f"\n--- Scene {scene_count} Image ---")
file_id = content_block.image_file.file_id
print(f"Image File ID: {file_id}")
# Retrieve and save the actual image
save_image_from_file_id(client, file_id)
except OpenAIError as e:
print(f"OpenAI API Error retrieving messages: {e}")
except Exception as e:
print(f"Unexpected error retrieving messages: {e}")
print("\n--- Visual Story Generation Finished ---")
# --- Run the Generator ---
if __name__ == "__main__":
story_idea = "Tell a short story about a curious cat who discovers a hidden garden behind its house."
# story_idea = "Write a sci-fi tale about an astronaut finding ancient ruins on Mars."
# story_idea = "A lonely lighthouse keeper befriends a migrating whale."
generate_visual_story(story_idea)
3. Setup and Complete Code
Rather than presenting the code in fragmented pieces, below you'll find a comprehensive, production-ready Python script that incorporates everything you need to build this visual story generator.
The script includes robust error handling for API interactions, proper management of asynchronous operations, and reliable image file retrieval and storage. It demonstrates best practices for working with the OpenAI Assistants API, handling both text and image generation, and managing the flow of data between different components of the system. After the code, we'll break down each major section to understand how it works and why certain implementation choices were made.
Prerequisites:
1. Install libraries: pip install openai python-dotenv
2. Create a .env
file in the same directory: OPENAI_API_KEY=your-api-key-here
# --- Visual Story Generator ---
# Uses OpenAI Assistants API (GPT-4o + DALL-E 3)
import os
import time
import datetime
from openai import OpenAI, OpenAIError
from dotenv import load_dotenv
# --- Configuration ---
load_dotenv()
ASSISTANT_NAME = "Visual Storyteller"
ASSISTANT_MODEL = "gpt-4o"
ASSISTANT_INSTRUCTIONS = (
"You are a visual storyteller. When a user gives you a story idea, "
"break it into 4–6 scenes. For each scene, first write a paragraph vividly describing the scene. "
"After describing the scene, explicitly call the image generation tool to create an image that matches that specific scene description. "
"Ensure each scene description is followed by its corresponding image generation call."
)
POLLING_INTERVAL_S = 2 # How often to check run status
RUN_TIMEOUT_S = 300 # Max time to wait for a run to complete
# --- Initialize OpenAI Client ---
try:
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
raise ValueError("OPENAI_API_KEY not found in environment variables.")
client = OpenAI(api_key=api_key)
print("OpenAI client initialized.")
except ValueError as e:
print(f"Configuration Error: {e}")
exit()
except Exception as e:
print(f"Error initializing OpenAI client: {e}")
exit()
# --- Helper Function: Create or Retrieve Assistant ---
def create_or_retrieve_assistant(client, name, instructions, model):
"""Creates a new assistant or retrieves an existing one by name."""
try:
# Check if assistant with this name already exists (simple check)
assistants = client.beta.assistants.list(limit=100)
for assistant in assistants.data:
if assistant.name == name:
print(f"Retrieved existing assistant '{name}' (ID: {assistant.id})")
return assistant
# If not found, create a new one
print(f"Creating new assistant '{name}'...")
assistant = client.beta.assistants.create(
name=name,
instructions=instructions,
model=model,
tools=[{"type": "image_generation"}] # Enable DALL-E 3 tool
)
print(f"Created new assistant '{name}' (ID: {assistant.id})")
return assistant
except OpenAIError as e:
print(f"OpenAI API Error creating/retrieving assistant: {e}")
return None
except Exception as e:
print(f"Unexpected error creating/retrieving assistant: {e}")
return None
# --- Helper Function: Poll Run Status ---
def poll_run_status(client, thread_id, run_id, timeout=RUN_TIMEOUT_S):
"""Polls the status of an assistant run until completion or timeout."""
start_time = time.time()
while time.time() - start_time < timeout:
try:
run_status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run_id)
print(f"Run ID: {run_id}, Status: {run_status.status}")
if run_status.status == "completed":
print("Run completed successfully.")
return run_status
elif run_status.status in ["queued", "in_progress", "requires_action"]:
time.sleep(POLLING_INTERVAL_S)
else: # failed, cancelled, expired
print(f"Run terminated with status: {run_status.status}")
return run_status # Return the final status object
except OpenAIError as e:
print(f"OpenAI API Error polling run status: {e}")
time.sleep(POLLING_INTERVAL_S) # Wait before retrying
except Exception as e:
print(f"Unexpected error polling run status: {e}")
return None # Exit polling on unexpected error
print("Run polling timed out.")
return None
# --- Helper Function: Retrieve and Save Image File ---
def save_image_from_file_id(client, file_id, output_dir="story_images"):
"""Retrieves image content using file_id and saves it."""
try:
# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)
# Retrieve file content (binary)
print(f"Retrieving content for file ID: {file_id}...")
response = client.files.retrieve_content(file_id)
image_data = response.read() # Read binary content
# Construct filename (ensure uniqueness if needed)
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
output_filename = os.path.join(output_dir, f"scene_{timestamp}_{file_id}.png")
# Save the image data to a file
with open(output_filename, "wb") as f:
f.write(image_data)
print(f"Image saved successfully as '{output_filename}'")
return output_filename
except OpenAIError as e:
print(f"OpenAI API Error retrieving file content for {file_id}: {e}")
return None
except Exception as e:
print(f"Unexpected error retrieving/saving file {file_id}: {e}")
return None
# --- Main Execution Logic ---
def generate_visual_story(user_prompt):
"""Generates a visual story based on the user prompt."""
print("\n--- Starting Visual Story Generation ---")
print(f"User Prompt: \"{user_prompt}\"")
# 1. Get the Assistant
assistant = create_or_retrieve_assistant(
client, ASSISTANT_NAME, ASSISTANT_INSTRUCTIONS, ASSISTANT_MODEL
)
if not assistant:
return
# 2. Create a Thread
try:
thread = client.beta.threads.create()
print(f"Created new thread (ID: {thread.id})")
except OpenAIError as e:
print(f"OpenAI API Error creating thread: {e}")
return
except Exception as e:
print(f"Unexpected error creating thread: {e}")
return
# 3. Add User Message to Thread
try:
client.beta.threads.messages.create(
thread_id=thread.id,
role="user",
content=user_prompt
)
print("User message added to thread.")
except OpenAIError as e:
print(f"OpenAI API Error adding message: {e}")
return
except Exception as e:
print(f"Unexpected error adding message: {e}")
return
# 4. Run the Assistant
try:
run = client.beta.threads.runs.create(
assistant_id=assistant.id,
thread_id=thread.id
# Instructions could be overridden here if needed
)
print(f"Assistant run initiated (Run ID: {run.id})")
except OpenAIError as e:
print(f"OpenAI API Error creating run: {e}")
return
except Exception as e:
print(f"Unexpected error creating run: {e}")
return
# 5. Poll for Run Completion
final_run_status = poll_run_status(client, thread.id, run.id)
if not final_run_status or final_run_status.status != "completed":
print("Story generation did not complete successfully.")
return
# 6. Retrieve and Process Messages
print("\n--- Retrieving Story Content ---")
try:
messages_response = client.beta.threads.messages.list(thread_id=thread.id, order="asc") # Get messages oldest to newest
messages_data = messages_response.data
scene_count = 0
for msg in messages_data:
if msg.role == "assistant": # Process only assistant messages
print(f"\nProcessing Assistant Message (ID: {msg.id})")
for content_block in msg.content:
if content_block.type == "text":
scene_count += 1 # Assume text block starts a new scene description
print(f"\n--- Scene {scene_count} Description ---")
print(content_block.text.value)
elif content_block.type == "image_file":
print(f"\n--- Scene {scene_count} Image ---")
file_id = content_block.image_file.file_id
print(f"Image File ID: {file_id}")
# Retrieve and save the actual image
save_image_from_file_id(client, file_id)
except OpenAIError as e:
print(f"OpenAI API Error retrieving messages: {e}")
except Exception as e:
print(f"Unexpected error retrieving messages: {e}")
print("\n--- Visual Story Generation Finished ---")
# --- Run the Generator ---
if __name__ == "__main__":
story_idea = "Tell a short story about a curious cat who discovers a hidden garden behind its house."
# story_idea = "Write a sci-fi tale about an astronaut finding ancient ruins on Mars."
# story_idea = "A lonely lighthouse keeper befriends a migrating whale."
generate_visual_story(story_idea)