Code icon

The App is Under a Quick Maintenance

We apologize for the inconvenience. Please come back later

Menu iconMenu iconOpenAI API Bible Volume 2
OpenAI API Bible Volume 2

Chapter 1: Image Generation and Vision with OpenAI Models

Practical Exercises — Chapter 1

Exercise 1: Generate an Image from a Text Prompt Using DALL·E 3

Task:

Create an assistant with image generation capabilities and use it to generate an image of “a cozy mountain cabin at sunrise in watercolor style.”

Solution:

import openai
import time

# Create an assistant with DALL·E image generation tool
assistant = openai.beta.assistants.create(
    name="Image Generator",
    instructions="You generate artistic images based on user prompts.",
    model="gpt-4o",
    tools=[{"type": "image_generation"}]
)

# Create a thread
thread = openai.beta.threads.create()

# Add user message with prompt
openai.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="Create an image of a cozy mountain cabin at sunrise in watercolor style."
)

# Run the assistant
run = openai.beta.threads.runs.create(
    assistant_id=assistant.id,
    thread_id=thread.id
)

# Wait for completion
while True:
    run_status = openai.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    if run_status.status == "completed":
        break
    time.sleep(1)

# Retrieve response
messages = openai.beta.threads.messages.list(thread_id=thread.id)
for msg in messages.data:
    for content in msg.content:
        if content.type == "image_file":
            print("Image URL:", content.image_file.url)

Exercise 2: Edit an Existing Image Using Inpainting

Task:

Upload a PNG image and ask DALL·E to replace “a dog in the image with a cat.”

Solution:

# Upload an image for editing (with transparent section or masked area)
image_file = openai.files.create(
    file=open("dog_scene.png", "rb"),  # Image with masked-out dog area
    purpose="image_edit"
)

# Create the assistant
assistant = openai.beta.assistants.create(
    name="Editor",
    instructions="You edit uploaded images based on user commands.",
    model="gpt-4o",
    tools=[{"type": "image_editing"}]
)

# Create a thread
thread = openai.beta.threads.create()

# Add edit request
openai.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="Replace the dog in this image with a gray cat sitting calmly.",
    file_ids=[image_file.id]
)

# Run the assistant
run = openai.beta.threads.runs.create(
    assistant_id=assistant.id,
    thread_id=thread.id
)

# Wait for run to complete
while True:
    run_status = openai.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    if run_status.status == "completed":
        break
    time.sleep(1)

# Get the edited image
messages = openai.beta.threads.messages.list(thread_id=thread.id)
for msg in messages.data:
    for content in msg.content:
        if content.type == "image_file":
            print("Edited Image URL:", content.image_file.url)

Exercise 3: Vision-Based Image Analysis with GPT-4o

Task:

Upload an image of a pie chart and ask GPT-4o to summarize the main takeaway from it.

Solution:

# Upload a pie chart image
image_file = openai.files.create(
    file=open("company_expenses_piechart.png", "rb"),
    purpose="vision"
)

# Send image + text prompt to GPT-4o
response = openai.ChatCompletion.create(
    model="gpt-4o",
    messages=[
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "Summarize the key insights from this pie chart."},
                {"type": "image_url", "image_url": {"url": f"file-{image_file.id}"}}
            ]
        }
    ],
    max_tokens=300,
    temperature=0.4
)

print("Summary:", response["choices"][0]["message"]["content"])

Exercise 4: Multimodal Comparison Between Two Designs

Task:

Upload two UI design mockups and ask GPT-4o to compare them and suggest improvements.

Solution:

# Upload the two UI designs
file_1 = openai.files.create(file=open("design_A.png", "rb"), purpose="vision")
file_2 = openai.files.create(file=open("design_B.png", "rb"), purpose="vision")

# Send both images with prompt
response = openai.ChatCompletion.create(
    model="gpt-4o",
    messages=[
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "Compare these two app designs and suggest improvements for the second one."},
                {"type": "image_url", "image_url": {"url": f"file-{file_1.id}"}},
                {"type": "image_url", "image_url": {"url": f"file-{file_2.id}"}}
            ]
        }
    ],
    max_tokens=500
)

print("Comparison Summary:", response["choices"][0]["message"]["content"])

These exercises gave you practical experience with:

  • Image generation using DALL·E 3
  • Image editing and inpainting with natural language
  • Visual reasoning and analysis with GPT-4o
  • Multimodal input combining text and multiple images

You now have the tools to build engaging visual applications—whether you're creating art, automating content generation, analyzing data, or enhancing accessibility.

Practical Exercises — Chapter 1

Exercise 1: Generate an Image from a Text Prompt Using DALL·E 3

Task:

Create an assistant with image generation capabilities and use it to generate an image of “a cozy mountain cabin at sunrise in watercolor style.”

Solution:

import openai
import time

# Create an assistant with DALL·E image generation tool
assistant = openai.beta.assistants.create(
    name="Image Generator",
    instructions="You generate artistic images based on user prompts.",
    model="gpt-4o",
    tools=[{"type": "image_generation"}]
)

# Create a thread
thread = openai.beta.threads.create()

# Add user message with prompt
openai.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="Create an image of a cozy mountain cabin at sunrise in watercolor style."
)

# Run the assistant
run = openai.beta.threads.runs.create(
    assistant_id=assistant.id,
    thread_id=thread.id
)

# Wait for completion
while True:
    run_status = openai.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    if run_status.status == "completed":
        break
    time.sleep(1)

# Retrieve response
messages = openai.beta.threads.messages.list(thread_id=thread.id)
for msg in messages.data:
    for content in msg.content:
        if content.type == "image_file":
            print("Image URL:", content.image_file.url)

Exercise 2: Edit an Existing Image Using Inpainting

Task:

Upload a PNG image and ask DALL·E to replace “a dog in the image with a cat.”

Solution:

# Upload an image for editing (with transparent section or masked area)
image_file = openai.files.create(
    file=open("dog_scene.png", "rb"),  # Image with masked-out dog area
    purpose="image_edit"
)

# Create the assistant
assistant = openai.beta.assistants.create(
    name="Editor",
    instructions="You edit uploaded images based on user commands.",
    model="gpt-4o",
    tools=[{"type": "image_editing"}]
)

# Create a thread
thread = openai.beta.threads.create()

# Add edit request
openai.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="Replace the dog in this image with a gray cat sitting calmly.",
    file_ids=[image_file.id]
)

# Run the assistant
run = openai.beta.threads.runs.create(
    assistant_id=assistant.id,
    thread_id=thread.id
)

# Wait for run to complete
while True:
    run_status = openai.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    if run_status.status == "completed":
        break
    time.sleep(1)

# Get the edited image
messages = openai.beta.threads.messages.list(thread_id=thread.id)
for msg in messages.data:
    for content in msg.content:
        if content.type == "image_file":
            print("Edited Image URL:", content.image_file.url)

Exercise 3: Vision-Based Image Analysis with GPT-4o

Task:

Upload an image of a pie chart and ask GPT-4o to summarize the main takeaway from it.

Solution:

# Upload a pie chart image
image_file = openai.files.create(
    file=open("company_expenses_piechart.png", "rb"),
    purpose="vision"
)

# Send image + text prompt to GPT-4o
response = openai.ChatCompletion.create(
    model="gpt-4o",
    messages=[
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "Summarize the key insights from this pie chart."},
                {"type": "image_url", "image_url": {"url": f"file-{image_file.id}"}}
            ]
        }
    ],
    max_tokens=300,
    temperature=0.4
)

print("Summary:", response["choices"][0]["message"]["content"])

Exercise 4: Multimodal Comparison Between Two Designs

Task:

Upload two UI design mockups and ask GPT-4o to compare them and suggest improvements.

Solution:

# Upload the two UI designs
file_1 = openai.files.create(file=open("design_A.png", "rb"), purpose="vision")
file_2 = openai.files.create(file=open("design_B.png", "rb"), purpose="vision")

# Send both images with prompt
response = openai.ChatCompletion.create(
    model="gpt-4o",
    messages=[
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "Compare these two app designs and suggest improvements for the second one."},
                {"type": "image_url", "image_url": {"url": f"file-{file_1.id}"}},
                {"type": "image_url", "image_url": {"url": f"file-{file_2.id}"}}
            ]
        }
    ],
    max_tokens=500
)

print("Comparison Summary:", response["choices"][0]["message"]["content"])

These exercises gave you practical experience with:

  • Image generation using DALL·E 3
  • Image editing and inpainting with natural language
  • Visual reasoning and analysis with GPT-4o
  • Multimodal input combining text and multiple images

You now have the tools to build engaging visual applications—whether you're creating art, automating content generation, analyzing data, or enhancing accessibility.

Practical Exercises — Chapter 1

Exercise 1: Generate an Image from a Text Prompt Using DALL·E 3

Task:

Create an assistant with image generation capabilities and use it to generate an image of “a cozy mountain cabin at sunrise in watercolor style.”

Solution:

import openai
import time

# Create an assistant with DALL·E image generation tool
assistant = openai.beta.assistants.create(
    name="Image Generator",
    instructions="You generate artistic images based on user prompts.",
    model="gpt-4o",
    tools=[{"type": "image_generation"}]
)

# Create a thread
thread = openai.beta.threads.create()

# Add user message with prompt
openai.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="Create an image of a cozy mountain cabin at sunrise in watercolor style."
)

# Run the assistant
run = openai.beta.threads.runs.create(
    assistant_id=assistant.id,
    thread_id=thread.id
)

# Wait for completion
while True:
    run_status = openai.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    if run_status.status == "completed":
        break
    time.sleep(1)

# Retrieve response
messages = openai.beta.threads.messages.list(thread_id=thread.id)
for msg in messages.data:
    for content in msg.content:
        if content.type == "image_file":
            print("Image URL:", content.image_file.url)

Exercise 2: Edit an Existing Image Using Inpainting

Task:

Upload a PNG image and ask DALL·E to replace “a dog in the image with a cat.”

Solution:

# Upload an image for editing (with transparent section or masked area)
image_file = openai.files.create(
    file=open("dog_scene.png", "rb"),  # Image with masked-out dog area
    purpose="image_edit"
)

# Create the assistant
assistant = openai.beta.assistants.create(
    name="Editor",
    instructions="You edit uploaded images based on user commands.",
    model="gpt-4o",
    tools=[{"type": "image_editing"}]
)

# Create a thread
thread = openai.beta.threads.create()

# Add edit request
openai.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="Replace the dog in this image with a gray cat sitting calmly.",
    file_ids=[image_file.id]
)

# Run the assistant
run = openai.beta.threads.runs.create(
    assistant_id=assistant.id,
    thread_id=thread.id
)

# Wait for run to complete
while True:
    run_status = openai.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    if run_status.status == "completed":
        break
    time.sleep(1)

# Get the edited image
messages = openai.beta.threads.messages.list(thread_id=thread.id)
for msg in messages.data:
    for content in msg.content:
        if content.type == "image_file":
            print("Edited Image URL:", content.image_file.url)

Exercise 3: Vision-Based Image Analysis with GPT-4o

Task:

Upload an image of a pie chart and ask GPT-4o to summarize the main takeaway from it.

Solution:

# Upload a pie chart image
image_file = openai.files.create(
    file=open("company_expenses_piechart.png", "rb"),
    purpose="vision"
)

# Send image + text prompt to GPT-4o
response = openai.ChatCompletion.create(
    model="gpt-4o",
    messages=[
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "Summarize the key insights from this pie chart."},
                {"type": "image_url", "image_url": {"url": f"file-{image_file.id}"}}
            ]
        }
    ],
    max_tokens=300,
    temperature=0.4
)

print("Summary:", response["choices"][0]["message"]["content"])

Exercise 4: Multimodal Comparison Between Two Designs

Task:

Upload two UI design mockups and ask GPT-4o to compare them and suggest improvements.

Solution:

# Upload the two UI designs
file_1 = openai.files.create(file=open("design_A.png", "rb"), purpose="vision")
file_2 = openai.files.create(file=open("design_B.png", "rb"), purpose="vision")

# Send both images with prompt
response = openai.ChatCompletion.create(
    model="gpt-4o",
    messages=[
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "Compare these two app designs and suggest improvements for the second one."},
                {"type": "image_url", "image_url": {"url": f"file-{file_1.id}"}},
                {"type": "image_url", "image_url": {"url": f"file-{file_2.id}"}}
            ]
        }
    ],
    max_tokens=500
)

print("Comparison Summary:", response["choices"][0]["message"]["content"])

These exercises gave you practical experience with:

  • Image generation using DALL·E 3
  • Image editing and inpainting with natural language
  • Visual reasoning and analysis with GPT-4o
  • Multimodal input combining text and multiple images

You now have the tools to build engaging visual applications—whether you're creating art, automating content generation, analyzing data, or enhancing accessibility.

Practical Exercises — Chapter 1

Exercise 1: Generate an Image from a Text Prompt Using DALL·E 3

Task:

Create an assistant with image generation capabilities and use it to generate an image of “a cozy mountain cabin at sunrise in watercolor style.”

Solution:

import openai
import time

# Create an assistant with DALL·E image generation tool
assistant = openai.beta.assistants.create(
    name="Image Generator",
    instructions="You generate artistic images based on user prompts.",
    model="gpt-4o",
    tools=[{"type": "image_generation"}]
)

# Create a thread
thread = openai.beta.threads.create()

# Add user message with prompt
openai.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="Create an image of a cozy mountain cabin at sunrise in watercolor style."
)

# Run the assistant
run = openai.beta.threads.runs.create(
    assistant_id=assistant.id,
    thread_id=thread.id
)

# Wait for completion
while True:
    run_status = openai.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    if run_status.status == "completed":
        break
    time.sleep(1)

# Retrieve response
messages = openai.beta.threads.messages.list(thread_id=thread.id)
for msg in messages.data:
    for content in msg.content:
        if content.type == "image_file":
            print("Image URL:", content.image_file.url)

Exercise 2: Edit an Existing Image Using Inpainting

Task:

Upload a PNG image and ask DALL·E to replace “a dog in the image with a cat.”

Solution:

# Upload an image for editing (with transparent section or masked area)
image_file = openai.files.create(
    file=open("dog_scene.png", "rb"),  # Image with masked-out dog area
    purpose="image_edit"
)

# Create the assistant
assistant = openai.beta.assistants.create(
    name="Editor",
    instructions="You edit uploaded images based on user commands.",
    model="gpt-4o",
    tools=[{"type": "image_editing"}]
)

# Create a thread
thread = openai.beta.threads.create()

# Add edit request
openai.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="Replace the dog in this image with a gray cat sitting calmly.",
    file_ids=[image_file.id]
)

# Run the assistant
run = openai.beta.threads.runs.create(
    assistant_id=assistant.id,
    thread_id=thread.id
)

# Wait for run to complete
while True:
    run_status = openai.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    if run_status.status == "completed":
        break
    time.sleep(1)

# Get the edited image
messages = openai.beta.threads.messages.list(thread_id=thread.id)
for msg in messages.data:
    for content in msg.content:
        if content.type == "image_file":
            print("Edited Image URL:", content.image_file.url)

Exercise 3: Vision-Based Image Analysis with GPT-4o

Task:

Upload an image of a pie chart and ask GPT-4o to summarize the main takeaway from it.

Solution:

# Upload a pie chart image
image_file = openai.files.create(
    file=open("company_expenses_piechart.png", "rb"),
    purpose="vision"
)

# Send image + text prompt to GPT-4o
response = openai.ChatCompletion.create(
    model="gpt-4o",
    messages=[
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "Summarize the key insights from this pie chart."},
                {"type": "image_url", "image_url": {"url": f"file-{image_file.id}"}}
            ]
        }
    ],
    max_tokens=300,
    temperature=0.4
)

print("Summary:", response["choices"][0]["message"]["content"])

Exercise 4: Multimodal Comparison Between Two Designs

Task:

Upload two UI design mockups and ask GPT-4o to compare them and suggest improvements.

Solution:

# Upload the two UI designs
file_1 = openai.files.create(file=open("design_A.png", "rb"), purpose="vision")
file_2 = openai.files.create(file=open("design_B.png", "rb"), purpose="vision")

# Send both images with prompt
response = openai.ChatCompletion.create(
    model="gpt-4o",
    messages=[
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "Compare these two app designs and suggest improvements for the second one."},
                {"type": "image_url", "image_url": {"url": f"file-{file_1.id}"}},
                {"type": "image_url", "image_url": {"url": f"file-{file_2.id}"}}
            ]
        }
    ],
    max_tokens=500
)

print("Comparison Summary:", response["choices"][0]["message"]["content"])

These exercises gave you practical experience with:

  • Image generation using DALL·E 3
  • Image editing and inpainting with natural language
  • Visual reasoning and analysis with GPT-4o
  • Multimodal input combining text and multiple images

You now have the tools to build engaging visual applications—whether you're creating art, automating content generation, analyzing data, or enhancing accessibility.