Code icon

The App is Under a Quick Maintenance

We apologize for the inconvenience. Please come back later

Menu iconMenu iconOpenAI API Bible Volume 2
OpenAI API Bible Volume 2

Project: Visual Story Generator: GPT-4o + DALL·E image flow based on prompt narrative

3. Setup and Complete Code

Rather than presenting the code in fragmented pieces, below you'll find a comprehensive, production-ready Python script that incorporates everything you need to build this visual story generator.

The script includes robust error handling for API interactions, proper management of asynchronous operations, and reliable image file retrieval and storage. It demonstrates best practices for working with the OpenAI Assistants API, handling both text and image generation, and managing the flow of data between different components of the system. After the code, we'll break down each major section to understand how it works and why certain implementation choices were made.

Prerequisites:

1. Install libraries: pip install openai python-dotenv
2. Create a .env file in the same directory: OPENAI_API_KEY=your-api-key-here

# --- Visual Story Generator ---
# Uses OpenAI Assistants API (GPT-4o + DALL-E 3)

import os
import time
import datetime
from openai import OpenAI, OpenAIError
from dotenv import load_dotenv

# --- Configuration ---
load_dotenv()
ASSISTANT_NAME = "Visual Storyteller"
ASSISTANT_MODEL = "gpt-4o"
ASSISTANT_INSTRUCTIONS = (
    "You are a visual storyteller. When a user gives you a story idea, "
    "break it into 4–6 scenes. For each scene, first write a paragraph vividly describing the scene. "
    "After describing the scene, explicitly call the image generation tool to create an image that matches that specific scene description. "
    "Ensure each scene description is followed by its corresponding image generation call."
)
POLLING_INTERVAL_S = 2 # How often to check run status
RUN_TIMEOUT_S = 300 # Max time to wait for a run to complete

# --- Initialize OpenAI Client ---
try:
    api_key = os.getenv("OPENAI_API_KEY")
    if not api_key:
        raise ValueError("OPENAI_API_KEY not found in environment variables.")
    client = OpenAI(api_key=api_key)
    print("OpenAI client initialized.")
except ValueError as e:
    print(f"Configuration Error: {e}")
    exit()
except Exception as e:
    print(f"Error initializing OpenAI client: {e}")
    exit()

# --- Helper Function: Create or Retrieve Assistant ---
def create_or_retrieve_assistant(client, name, instructions, model):
    """Creates a new assistant or retrieves an existing one by name."""
    try:
        # Check if assistant with this name already exists (simple check)
        assistants = client.beta.assistants.list(limit=100)
        for assistant in assistants.data:
            if assistant.name == name:
                print(f"Retrieved existing assistant '{name}' (ID: {assistant.id})")
                return assistant

        # If not found, create a new one
        print(f"Creating new assistant '{name}'...")
        assistant = client.beta.assistants.create(
            name=name,
            instructions=instructions,
            model=model,
            tools=[{"type": "image_generation"}] # Enable DALL-E 3 tool
        )
        print(f"Created new assistant '{name}' (ID: {assistant.id})")
        return assistant
    except OpenAIError as e:
        print(f"OpenAI API Error creating/retrieving assistant: {e}")
        return None
    except Exception as e:
        print(f"Unexpected error creating/retrieving assistant: {e}")
        return None

# --- Helper Function: Poll Run Status ---
def poll_run_status(client, thread_id, run_id, timeout=RUN_TIMEOUT_S):
    """Polls the status of an assistant run until completion or timeout."""
    start_time = time.time()
    while time.time() - start_time < timeout:
        try:
            run_status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run_id)
            print(f"Run ID: {run_id}, Status: {run_status.status}")

            if run_status.status == "completed":
                print("Run completed successfully.")
                return run_status
            elif run_status.status in ["queued", "in_progress", "requires_action"]:
                time.sleep(POLLING_INTERVAL_S)
            else: # failed, cancelled, expired
                print(f"Run terminated with status: {run_status.status}")
                return run_status # Return the final status object
        except OpenAIError as e:
            print(f"OpenAI API Error polling run status: {e}")
            time.sleep(POLLING_INTERVAL_S) # Wait before retrying
        except Exception as e:
            print(f"Unexpected error polling run status: {e}")
            return None # Exit polling on unexpected error
    print("Run polling timed out.")
    return None

# --- Helper Function: Retrieve and Save Image File ---
def save_image_from_file_id(client, file_id, output_dir="story_images"):
    """Retrieves image content using file_id and saves it."""
    try:
        # Ensure output directory exists
        os.makedirs(output_dir, exist_ok=True)

        # Retrieve file content (binary)
        print(f"Retrieving content for file ID: {file_id}...")
        response = client.files.retrieve_content(file_id)
        image_data = response.read() # Read binary content

        # Construct filename (ensure uniqueness if needed)
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        output_filename = os.path.join(output_dir, f"scene_{timestamp}_{file_id}.png")

        # Save the image data to a file
        with open(output_filename, "wb") as f:
            f.write(image_data)
        print(f"Image saved successfully as '{output_filename}'")
        return output_filename

    except OpenAIError as e:
        print(f"OpenAI API Error retrieving file content for {file_id}: {e}")
        return None
    except Exception as e:
        print(f"Unexpected error retrieving/saving file {file_id}: {e}")
        return None

# --- Main Execution Logic ---
def generate_visual_story(user_prompt):
    """Generates a visual story based on the user prompt."""
    print("\n--- Starting Visual Story Generation ---")
    print(f"User Prompt: \"{user_prompt}\"")

    # 1. Get the Assistant
    assistant = create_or_retrieve_assistant(
        client, ASSISTANT_NAME, ASSISTANT_INSTRUCTIONS, ASSISTANT_MODEL
    )
    if not assistant:
        return

    # 2. Create a Thread
    try:
        thread = client.beta.threads.create()
        print(f"Created new thread (ID: {thread.id})")
    except OpenAIError as e:
        print(f"OpenAI API Error creating thread: {e}")
        return
    except Exception as e:
        print(f"Unexpected error creating thread: {e}")
        return

    # 3. Add User Message to Thread
    try:
        client.beta.threads.messages.create(
            thread_id=thread.id,
            role="user",
            content=user_prompt
        )
        print("User message added to thread.")
    except OpenAIError as e:
        print(f"OpenAI API Error adding message: {e}")
        return
    except Exception as e:
        print(f"Unexpected error adding message: {e}")
        return

    # 4. Run the Assistant
    try:
        run = client.beta.threads.runs.create(
            assistant_id=assistant.id,
            thread_id=thread.id
            # Instructions could be overridden here if needed
        )
        print(f"Assistant run initiated (Run ID: {run.id})")
    except OpenAIError as e:
        print(f"OpenAI API Error creating run: {e}")
        return
    except Exception as e:
        print(f"Unexpected error creating run: {e}")
        return

    # 5. Poll for Run Completion
    final_run_status = poll_run_status(client, thread.id, run.id)

    if not final_run_status or final_run_status.status != "completed":
        print("Story generation did not complete successfully.")
        return

    # 6. Retrieve and Process Messages
    print("\n--- Retrieving Story Content ---")
    try:
        messages_response = client.beta.threads.messages.list(thread_id=thread.id, order="asc") # Get messages oldest to newest
        messages_data = messages_response.data

        scene_count = 0
        for msg in messages_data:
            if msg.role == "assistant": # Process only assistant messages
                print(f"\nProcessing Assistant Message (ID: {msg.id})")
                for content_block in msg.content:
                    if content_block.type == "text":
                        scene_count += 1 # Assume text block starts a new scene description
                        print(f"\n--- Scene {scene_count} Description ---")
                        print(content_block.text.value)
                    elif content_block.type == "image_file":
                        print(f"\n--- Scene {scene_count} Image ---")
                        file_id = content_block.image_file.file_id
                        print(f"Image File ID: {file_id}")
                        # Retrieve and save the actual image
                        save_image_from_file_id(client, file_id)

    except OpenAIError as e:
        print(f"OpenAI API Error retrieving messages: {e}")
    except Exception as e:
        print(f"Unexpected error retrieving messages: {e}")

    print("\n--- Visual Story Generation Finished ---")


# --- Run the Generator ---
if __name__ == "__main__":
    story_idea = "Tell a short story about a curious cat who discovers a hidden garden behind its house."
    # story_idea = "Write a sci-fi tale about an astronaut finding ancient ruins on Mars."
    # story_idea = "A lonely lighthouse keeper befriends a migrating whale."

    generate_visual_story(story_idea)

3. Setup and Complete Code

Rather than presenting the code in fragmented pieces, below you'll find a comprehensive, production-ready Python script that incorporates everything you need to build this visual story generator.

The script includes robust error handling for API interactions, proper management of asynchronous operations, and reliable image file retrieval and storage. It demonstrates best practices for working with the OpenAI Assistants API, handling both text and image generation, and managing the flow of data between different components of the system. After the code, we'll break down each major section to understand how it works and why certain implementation choices were made.

Prerequisites:

1. Install libraries: pip install openai python-dotenv
2. Create a .env file in the same directory: OPENAI_API_KEY=your-api-key-here

# --- Visual Story Generator ---
# Uses OpenAI Assistants API (GPT-4o + DALL-E 3)

import os
import time
import datetime
from openai import OpenAI, OpenAIError
from dotenv import load_dotenv

# --- Configuration ---
load_dotenv()
ASSISTANT_NAME = "Visual Storyteller"
ASSISTANT_MODEL = "gpt-4o"
ASSISTANT_INSTRUCTIONS = (
    "You are a visual storyteller. When a user gives you a story idea, "
    "break it into 4–6 scenes. For each scene, first write a paragraph vividly describing the scene. "
    "After describing the scene, explicitly call the image generation tool to create an image that matches that specific scene description. "
    "Ensure each scene description is followed by its corresponding image generation call."
)
POLLING_INTERVAL_S = 2 # How often to check run status
RUN_TIMEOUT_S = 300 # Max time to wait for a run to complete

# --- Initialize OpenAI Client ---
try:
    api_key = os.getenv("OPENAI_API_KEY")
    if not api_key:
        raise ValueError("OPENAI_API_KEY not found in environment variables.")
    client = OpenAI(api_key=api_key)
    print("OpenAI client initialized.")
except ValueError as e:
    print(f"Configuration Error: {e}")
    exit()
except Exception as e:
    print(f"Error initializing OpenAI client: {e}")
    exit()

# --- Helper Function: Create or Retrieve Assistant ---
def create_or_retrieve_assistant(client, name, instructions, model):
    """Creates a new assistant or retrieves an existing one by name."""
    try:
        # Check if assistant with this name already exists (simple check)
        assistants = client.beta.assistants.list(limit=100)
        for assistant in assistants.data:
            if assistant.name == name:
                print(f"Retrieved existing assistant '{name}' (ID: {assistant.id})")
                return assistant

        # If not found, create a new one
        print(f"Creating new assistant '{name}'...")
        assistant = client.beta.assistants.create(
            name=name,
            instructions=instructions,
            model=model,
            tools=[{"type": "image_generation"}] # Enable DALL-E 3 tool
        )
        print(f"Created new assistant '{name}' (ID: {assistant.id})")
        return assistant
    except OpenAIError as e:
        print(f"OpenAI API Error creating/retrieving assistant: {e}")
        return None
    except Exception as e:
        print(f"Unexpected error creating/retrieving assistant: {e}")
        return None

# --- Helper Function: Poll Run Status ---
def poll_run_status(client, thread_id, run_id, timeout=RUN_TIMEOUT_S):
    """Polls the status of an assistant run until completion or timeout."""
    start_time = time.time()
    while time.time() - start_time < timeout:
        try:
            run_status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run_id)
            print(f"Run ID: {run_id}, Status: {run_status.status}")

            if run_status.status == "completed":
                print("Run completed successfully.")
                return run_status
            elif run_status.status in ["queued", "in_progress", "requires_action"]:
                time.sleep(POLLING_INTERVAL_S)
            else: # failed, cancelled, expired
                print(f"Run terminated with status: {run_status.status}")
                return run_status # Return the final status object
        except OpenAIError as e:
            print(f"OpenAI API Error polling run status: {e}")
            time.sleep(POLLING_INTERVAL_S) # Wait before retrying
        except Exception as e:
            print(f"Unexpected error polling run status: {e}")
            return None # Exit polling on unexpected error
    print("Run polling timed out.")
    return None

# --- Helper Function: Retrieve and Save Image File ---
def save_image_from_file_id(client, file_id, output_dir="story_images"):
    """Retrieves image content using file_id and saves it."""
    try:
        # Ensure output directory exists
        os.makedirs(output_dir, exist_ok=True)

        # Retrieve file content (binary)
        print(f"Retrieving content for file ID: {file_id}...")
        response = client.files.retrieve_content(file_id)
        image_data = response.read() # Read binary content

        # Construct filename (ensure uniqueness if needed)
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        output_filename = os.path.join(output_dir, f"scene_{timestamp}_{file_id}.png")

        # Save the image data to a file
        with open(output_filename, "wb") as f:
            f.write(image_data)
        print(f"Image saved successfully as '{output_filename}'")
        return output_filename

    except OpenAIError as e:
        print(f"OpenAI API Error retrieving file content for {file_id}: {e}")
        return None
    except Exception as e:
        print(f"Unexpected error retrieving/saving file {file_id}: {e}")
        return None

# --- Main Execution Logic ---
def generate_visual_story(user_prompt):
    """Generates a visual story based on the user prompt."""
    print("\n--- Starting Visual Story Generation ---")
    print(f"User Prompt: \"{user_prompt}\"")

    # 1. Get the Assistant
    assistant = create_or_retrieve_assistant(
        client, ASSISTANT_NAME, ASSISTANT_INSTRUCTIONS, ASSISTANT_MODEL
    )
    if not assistant:
        return

    # 2. Create a Thread
    try:
        thread = client.beta.threads.create()
        print(f"Created new thread (ID: {thread.id})")
    except OpenAIError as e:
        print(f"OpenAI API Error creating thread: {e}")
        return
    except Exception as e:
        print(f"Unexpected error creating thread: {e}")
        return

    # 3. Add User Message to Thread
    try:
        client.beta.threads.messages.create(
            thread_id=thread.id,
            role="user",
            content=user_prompt
        )
        print("User message added to thread.")
    except OpenAIError as e:
        print(f"OpenAI API Error adding message: {e}")
        return
    except Exception as e:
        print(f"Unexpected error adding message: {e}")
        return

    # 4. Run the Assistant
    try:
        run = client.beta.threads.runs.create(
            assistant_id=assistant.id,
            thread_id=thread.id
            # Instructions could be overridden here if needed
        )
        print(f"Assistant run initiated (Run ID: {run.id})")
    except OpenAIError as e:
        print(f"OpenAI API Error creating run: {e}")
        return
    except Exception as e:
        print(f"Unexpected error creating run: {e}")
        return

    # 5. Poll for Run Completion
    final_run_status = poll_run_status(client, thread.id, run.id)

    if not final_run_status or final_run_status.status != "completed":
        print("Story generation did not complete successfully.")
        return

    # 6. Retrieve and Process Messages
    print("\n--- Retrieving Story Content ---")
    try:
        messages_response = client.beta.threads.messages.list(thread_id=thread.id, order="asc") # Get messages oldest to newest
        messages_data = messages_response.data

        scene_count = 0
        for msg in messages_data:
            if msg.role == "assistant": # Process only assistant messages
                print(f"\nProcessing Assistant Message (ID: {msg.id})")
                for content_block in msg.content:
                    if content_block.type == "text":
                        scene_count += 1 # Assume text block starts a new scene description
                        print(f"\n--- Scene {scene_count} Description ---")
                        print(content_block.text.value)
                    elif content_block.type == "image_file":
                        print(f"\n--- Scene {scene_count} Image ---")
                        file_id = content_block.image_file.file_id
                        print(f"Image File ID: {file_id}")
                        # Retrieve and save the actual image
                        save_image_from_file_id(client, file_id)

    except OpenAIError as e:
        print(f"OpenAI API Error retrieving messages: {e}")
    except Exception as e:
        print(f"Unexpected error retrieving messages: {e}")

    print("\n--- Visual Story Generation Finished ---")


# --- Run the Generator ---
if __name__ == "__main__":
    story_idea = "Tell a short story about a curious cat who discovers a hidden garden behind its house."
    # story_idea = "Write a sci-fi tale about an astronaut finding ancient ruins on Mars."
    # story_idea = "A lonely lighthouse keeper befriends a migrating whale."

    generate_visual_story(story_idea)

3. Setup and Complete Code

Rather than presenting the code in fragmented pieces, below you'll find a comprehensive, production-ready Python script that incorporates everything you need to build this visual story generator.

The script includes robust error handling for API interactions, proper management of asynchronous operations, and reliable image file retrieval and storage. It demonstrates best practices for working with the OpenAI Assistants API, handling both text and image generation, and managing the flow of data between different components of the system. After the code, we'll break down each major section to understand how it works and why certain implementation choices were made.

Prerequisites:

1. Install libraries: pip install openai python-dotenv
2. Create a .env file in the same directory: OPENAI_API_KEY=your-api-key-here

# --- Visual Story Generator ---
# Uses OpenAI Assistants API (GPT-4o + DALL-E 3)

import os
import time
import datetime
from openai import OpenAI, OpenAIError
from dotenv import load_dotenv

# --- Configuration ---
load_dotenv()
ASSISTANT_NAME = "Visual Storyteller"
ASSISTANT_MODEL = "gpt-4o"
ASSISTANT_INSTRUCTIONS = (
    "You are a visual storyteller. When a user gives you a story idea, "
    "break it into 4–6 scenes. For each scene, first write a paragraph vividly describing the scene. "
    "After describing the scene, explicitly call the image generation tool to create an image that matches that specific scene description. "
    "Ensure each scene description is followed by its corresponding image generation call."
)
POLLING_INTERVAL_S = 2 # How often to check run status
RUN_TIMEOUT_S = 300 # Max time to wait for a run to complete

# --- Initialize OpenAI Client ---
try:
    api_key = os.getenv("OPENAI_API_KEY")
    if not api_key:
        raise ValueError("OPENAI_API_KEY not found in environment variables.")
    client = OpenAI(api_key=api_key)
    print("OpenAI client initialized.")
except ValueError as e:
    print(f"Configuration Error: {e}")
    exit()
except Exception as e:
    print(f"Error initializing OpenAI client: {e}")
    exit()

# --- Helper Function: Create or Retrieve Assistant ---
def create_or_retrieve_assistant(client, name, instructions, model):
    """Creates a new assistant or retrieves an existing one by name."""
    try:
        # Check if assistant with this name already exists (simple check)
        assistants = client.beta.assistants.list(limit=100)
        for assistant in assistants.data:
            if assistant.name == name:
                print(f"Retrieved existing assistant '{name}' (ID: {assistant.id})")
                return assistant

        # If not found, create a new one
        print(f"Creating new assistant '{name}'...")
        assistant = client.beta.assistants.create(
            name=name,
            instructions=instructions,
            model=model,
            tools=[{"type": "image_generation"}] # Enable DALL-E 3 tool
        )
        print(f"Created new assistant '{name}' (ID: {assistant.id})")
        return assistant
    except OpenAIError as e:
        print(f"OpenAI API Error creating/retrieving assistant: {e}")
        return None
    except Exception as e:
        print(f"Unexpected error creating/retrieving assistant: {e}")
        return None

# --- Helper Function: Poll Run Status ---
def poll_run_status(client, thread_id, run_id, timeout=RUN_TIMEOUT_S):
    """Polls the status of an assistant run until completion or timeout."""
    start_time = time.time()
    while time.time() - start_time < timeout:
        try:
            run_status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run_id)
            print(f"Run ID: {run_id}, Status: {run_status.status}")

            if run_status.status == "completed":
                print("Run completed successfully.")
                return run_status
            elif run_status.status in ["queued", "in_progress", "requires_action"]:
                time.sleep(POLLING_INTERVAL_S)
            else: # failed, cancelled, expired
                print(f"Run terminated with status: {run_status.status}")
                return run_status # Return the final status object
        except OpenAIError as e:
            print(f"OpenAI API Error polling run status: {e}")
            time.sleep(POLLING_INTERVAL_S) # Wait before retrying
        except Exception as e:
            print(f"Unexpected error polling run status: {e}")
            return None # Exit polling on unexpected error
    print("Run polling timed out.")
    return None

# --- Helper Function: Retrieve and Save Image File ---
def save_image_from_file_id(client, file_id, output_dir="story_images"):
    """Retrieves image content using file_id and saves it."""
    try:
        # Ensure output directory exists
        os.makedirs(output_dir, exist_ok=True)

        # Retrieve file content (binary)
        print(f"Retrieving content for file ID: {file_id}...")
        response = client.files.retrieve_content(file_id)
        image_data = response.read() # Read binary content

        # Construct filename (ensure uniqueness if needed)
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        output_filename = os.path.join(output_dir, f"scene_{timestamp}_{file_id}.png")

        # Save the image data to a file
        with open(output_filename, "wb") as f:
            f.write(image_data)
        print(f"Image saved successfully as '{output_filename}'")
        return output_filename

    except OpenAIError as e:
        print(f"OpenAI API Error retrieving file content for {file_id}: {e}")
        return None
    except Exception as e:
        print(f"Unexpected error retrieving/saving file {file_id}: {e}")
        return None

# --- Main Execution Logic ---
def generate_visual_story(user_prompt):
    """Generates a visual story based on the user prompt."""
    print("\n--- Starting Visual Story Generation ---")
    print(f"User Prompt: \"{user_prompt}\"")

    # 1. Get the Assistant
    assistant = create_or_retrieve_assistant(
        client, ASSISTANT_NAME, ASSISTANT_INSTRUCTIONS, ASSISTANT_MODEL
    )
    if not assistant:
        return

    # 2. Create a Thread
    try:
        thread = client.beta.threads.create()
        print(f"Created new thread (ID: {thread.id})")
    except OpenAIError as e:
        print(f"OpenAI API Error creating thread: {e}")
        return
    except Exception as e:
        print(f"Unexpected error creating thread: {e}")
        return

    # 3. Add User Message to Thread
    try:
        client.beta.threads.messages.create(
            thread_id=thread.id,
            role="user",
            content=user_prompt
        )
        print("User message added to thread.")
    except OpenAIError as e:
        print(f"OpenAI API Error adding message: {e}")
        return
    except Exception as e:
        print(f"Unexpected error adding message: {e}")
        return

    # 4. Run the Assistant
    try:
        run = client.beta.threads.runs.create(
            assistant_id=assistant.id,
            thread_id=thread.id
            # Instructions could be overridden here if needed
        )
        print(f"Assistant run initiated (Run ID: {run.id})")
    except OpenAIError as e:
        print(f"OpenAI API Error creating run: {e}")
        return
    except Exception as e:
        print(f"Unexpected error creating run: {e}")
        return

    # 5. Poll for Run Completion
    final_run_status = poll_run_status(client, thread.id, run.id)

    if not final_run_status or final_run_status.status != "completed":
        print("Story generation did not complete successfully.")
        return

    # 6. Retrieve and Process Messages
    print("\n--- Retrieving Story Content ---")
    try:
        messages_response = client.beta.threads.messages.list(thread_id=thread.id, order="asc") # Get messages oldest to newest
        messages_data = messages_response.data

        scene_count = 0
        for msg in messages_data:
            if msg.role == "assistant": # Process only assistant messages
                print(f"\nProcessing Assistant Message (ID: {msg.id})")
                for content_block in msg.content:
                    if content_block.type == "text":
                        scene_count += 1 # Assume text block starts a new scene description
                        print(f"\n--- Scene {scene_count} Description ---")
                        print(content_block.text.value)
                    elif content_block.type == "image_file":
                        print(f"\n--- Scene {scene_count} Image ---")
                        file_id = content_block.image_file.file_id
                        print(f"Image File ID: {file_id}")
                        # Retrieve and save the actual image
                        save_image_from_file_id(client, file_id)

    except OpenAIError as e:
        print(f"OpenAI API Error retrieving messages: {e}")
    except Exception as e:
        print(f"Unexpected error retrieving messages: {e}")

    print("\n--- Visual Story Generation Finished ---")


# --- Run the Generator ---
if __name__ == "__main__":
    story_idea = "Tell a short story about a curious cat who discovers a hidden garden behind its house."
    # story_idea = "Write a sci-fi tale about an astronaut finding ancient ruins on Mars."
    # story_idea = "A lonely lighthouse keeper befriends a migrating whale."

    generate_visual_story(story_idea)

3. Setup and Complete Code

Rather than presenting the code in fragmented pieces, below you'll find a comprehensive, production-ready Python script that incorporates everything you need to build this visual story generator.

The script includes robust error handling for API interactions, proper management of asynchronous operations, and reliable image file retrieval and storage. It demonstrates best practices for working with the OpenAI Assistants API, handling both text and image generation, and managing the flow of data between different components of the system. After the code, we'll break down each major section to understand how it works and why certain implementation choices were made.

Prerequisites:

1. Install libraries: pip install openai python-dotenv
2. Create a .env file in the same directory: OPENAI_API_KEY=your-api-key-here

# --- Visual Story Generator ---
# Uses OpenAI Assistants API (GPT-4o + DALL-E 3)

import os
import time
import datetime
from openai import OpenAI, OpenAIError
from dotenv import load_dotenv

# --- Configuration ---
load_dotenv()
ASSISTANT_NAME = "Visual Storyteller"
ASSISTANT_MODEL = "gpt-4o"
ASSISTANT_INSTRUCTIONS = (
    "You are a visual storyteller. When a user gives you a story idea, "
    "break it into 4–6 scenes. For each scene, first write a paragraph vividly describing the scene. "
    "After describing the scene, explicitly call the image generation tool to create an image that matches that specific scene description. "
    "Ensure each scene description is followed by its corresponding image generation call."
)
POLLING_INTERVAL_S = 2 # How often to check run status
RUN_TIMEOUT_S = 300 # Max time to wait for a run to complete

# --- Initialize OpenAI Client ---
try:
    api_key = os.getenv("OPENAI_API_KEY")
    if not api_key:
        raise ValueError("OPENAI_API_KEY not found in environment variables.")
    client = OpenAI(api_key=api_key)
    print("OpenAI client initialized.")
except ValueError as e:
    print(f"Configuration Error: {e}")
    exit()
except Exception as e:
    print(f"Error initializing OpenAI client: {e}")
    exit()

# --- Helper Function: Create or Retrieve Assistant ---
def create_or_retrieve_assistant(client, name, instructions, model):
    """Creates a new assistant or retrieves an existing one by name."""
    try:
        # Check if assistant with this name already exists (simple check)
        assistants = client.beta.assistants.list(limit=100)
        for assistant in assistants.data:
            if assistant.name == name:
                print(f"Retrieved existing assistant '{name}' (ID: {assistant.id})")
                return assistant

        # If not found, create a new one
        print(f"Creating new assistant '{name}'...")
        assistant = client.beta.assistants.create(
            name=name,
            instructions=instructions,
            model=model,
            tools=[{"type": "image_generation"}] # Enable DALL-E 3 tool
        )
        print(f"Created new assistant '{name}' (ID: {assistant.id})")
        return assistant
    except OpenAIError as e:
        print(f"OpenAI API Error creating/retrieving assistant: {e}")
        return None
    except Exception as e:
        print(f"Unexpected error creating/retrieving assistant: {e}")
        return None

# --- Helper Function: Poll Run Status ---
def poll_run_status(client, thread_id, run_id, timeout=RUN_TIMEOUT_S):
    """Polls the status of an assistant run until completion or timeout."""
    start_time = time.time()
    while time.time() - start_time < timeout:
        try:
            run_status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run_id)
            print(f"Run ID: {run_id}, Status: {run_status.status}")

            if run_status.status == "completed":
                print("Run completed successfully.")
                return run_status
            elif run_status.status in ["queued", "in_progress", "requires_action"]:
                time.sleep(POLLING_INTERVAL_S)
            else: # failed, cancelled, expired
                print(f"Run terminated with status: {run_status.status}")
                return run_status # Return the final status object
        except OpenAIError as e:
            print(f"OpenAI API Error polling run status: {e}")
            time.sleep(POLLING_INTERVAL_S) # Wait before retrying
        except Exception as e:
            print(f"Unexpected error polling run status: {e}")
            return None # Exit polling on unexpected error
    print("Run polling timed out.")
    return None

# --- Helper Function: Retrieve and Save Image File ---
def save_image_from_file_id(client, file_id, output_dir="story_images"):
    """Retrieves image content using file_id and saves it."""
    try:
        # Ensure output directory exists
        os.makedirs(output_dir, exist_ok=True)

        # Retrieve file content (binary)
        print(f"Retrieving content for file ID: {file_id}...")
        response = client.files.retrieve_content(file_id)
        image_data = response.read() # Read binary content

        # Construct filename (ensure uniqueness if needed)
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        output_filename = os.path.join(output_dir, f"scene_{timestamp}_{file_id}.png")

        # Save the image data to a file
        with open(output_filename, "wb") as f:
            f.write(image_data)
        print(f"Image saved successfully as '{output_filename}'")
        return output_filename

    except OpenAIError as e:
        print(f"OpenAI API Error retrieving file content for {file_id}: {e}")
        return None
    except Exception as e:
        print(f"Unexpected error retrieving/saving file {file_id}: {e}")
        return None

# --- Main Execution Logic ---
def generate_visual_story(user_prompt):
    """Generates a visual story based on the user prompt."""
    print("\n--- Starting Visual Story Generation ---")
    print(f"User Prompt: \"{user_prompt}\"")

    # 1. Get the Assistant
    assistant = create_or_retrieve_assistant(
        client, ASSISTANT_NAME, ASSISTANT_INSTRUCTIONS, ASSISTANT_MODEL
    )
    if not assistant:
        return

    # 2. Create a Thread
    try:
        thread = client.beta.threads.create()
        print(f"Created new thread (ID: {thread.id})")
    except OpenAIError as e:
        print(f"OpenAI API Error creating thread: {e}")
        return
    except Exception as e:
        print(f"Unexpected error creating thread: {e}")
        return

    # 3. Add User Message to Thread
    try:
        client.beta.threads.messages.create(
            thread_id=thread.id,
            role="user",
            content=user_prompt
        )
        print("User message added to thread.")
    except OpenAIError as e:
        print(f"OpenAI API Error adding message: {e}")
        return
    except Exception as e:
        print(f"Unexpected error adding message: {e}")
        return

    # 4. Run the Assistant
    try:
        run = client.beta.threads.runs.create(
            assistant_id=assistant.id,
            thread_id=thread.id
            # Instructions could be overridden here if needed
        )
        print(f"Assistant run initiated (Run ID: {run.id})")
    except OpenAIError as e:
        print(f"OpenAI API Error creating run: {e}")
        return
    except Exception as e:
        print(f"Unexpected error creating run: {e}")
        return

    # 5. Poll for Run Completion
    final_run_status = poll_run_status(client, thread.id, run.id)

    if not final_run_status or final_run_status.status != "completed":
        print("Story generation did not complete successfully.")
        return

    # 6. Retrieve and Process Messages
    print("\n--- Retrieving Story Content ---")
    try:
        messages_response = client.beta.threads.messages.list(thread_id=thread.id, order="asc") # Get messages oldest to newest
        messages_data = messages_response.data

        scene_count = 0
        for msg in messages_data:
            if msg.role == "assistant": # Process only assistant messages
                print(f"\nProcessing Assistant Message (ID: {msg.id})")
                for content_block in msg.content:
                    if content_block.type == "text":
                        scene_count += 1 # Assume text block starts a new scene description
                        print(f"\n--- Scene {scene_count} Description ---")
                        print(content_block.text.value)
                    elif content_block.type == "image_file":
                        print(f"\n--- Scene {scene_count} Image ---")
                        file_id = content_block.image_file.file_id
                        print(f"Image File ID: {file_id}")
                        # Retrieve and save the actual image
                        save_image_from_file_id(client, file_id)

    except OpenAIError as e:
        print(f"OpenAI API Error retrieving messages: {e}")
    except Exception as e:
        print(f"Unexpected error retrieving messages: {e}")

    print("\n--- Visual Story Generation Finished ---")


# --- Run the Generator ---
if __name__ == "__main__":
    story_idea = "Tell a short story about a curious cat who discovers a hidden garden behind its house."
    # story_idea = "Write a sci-fi tale about an astronaut finding ancient ruins on Mars."
    # story_idea = "A lonely lighthouse keeper befriends a migrating whale."

    generate_visual_story(story_idea)