Code icon

The App is Under a Quick Maintenance

We apologize for the inconvenience. Please come back later

Menu iconMenu iconOpenAI API Bible Volume 1
OpenAI API Bible Volume 1

Chapter 7: Memory and Multi-Turn Conversations

Practical Exercises β€” Chapter 7

Exercise 1: Implementing Short-Term Memory in a Conversation

Task:

Create a simple assistant that remembers previous user messages and responds appropriately using short-term memory. Keep the conversation in a local list and send the full history on each turn.

Solution:

import openai
import os
from dotenv import load_dotenv

load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

conversation = [
    {"role": "system", "content": "You are a helpful assistant."}
]

def chat(user_input):
    conversation.append({"role": "user", "content": user_input})

    response = openai.ChatCompletion.create(
        model="gpt-4o",
        messages=conversation,
        max_tokens=150,
        temperature=0.5
    )

    reply = response["choices"][0]["message"]["content"]
    conversation.append({"role": "assistant", "content": reply})
    return reply

# Try a multi-turn conversation
print("User: What’s your favorite programming language?")
print("Assistant:", chat("What’s your favorite programming language?"))

print("\nUser: Can you show me a simple example in it?")
print("Assistant:", chat("Can you show me a simple example in it?"))

Exercise 2: Saving and Retrieving Long-Term Memory

Task:

Simulate persistent memory by storing messages in a JSON file and retrieving them at the start of a new session.

Solution:

import json

MEMORY_FILE = "user_memory.json"

def save_message(message):
    try:
        with open(MEMORY_FILE, "r") as f:
            memory = json.load(f)
    except FileNotFoundError:
        memory = []

    memory.append(message)

    with open(MEMORY_FILE, "w") as f:
        json.dump(memory, f, indent=2)

def load_memory():
    try:
        with open(MEMORY_FILE, "r") as f:
            return json.load(f)
    except FileNotFoundError:
        return []

# Store a sample message
save_message({"role": "user", "content": "How does recursion work?"})
save_message({"role": "assistant", "content": "Recursion is when a function calls itself to solve a problem."})

# Load and review memory
print("Long-Term Memory:", load_memory())

Exercise 3: Summarize Previous Messages to Save Tokens

Task:

Use the Chat Completions API to summarize old messages before injecting them into a new conversation to stay under token limits.

Solution:

def summarize_conversation(history):
    summary_prompt = [
        {"role": "system", "content": "Summarize this conversation briefly:"},
        {"role": "user", "content": "\n".join([f'{m["role"]}: {m["content"]}' for m in history])}
    ]

    response = openai.ChatCompletion.create(
        model="gpt-4o",
        messages=summary_prompt,
        max_tokens=100,
        temperature=0.3
    )

    return {"role": "system", "content": "Summary: " + response["choices"][0]["message"]["content"]}

# Example history
history = [
    {"role": "user", "content": "Tell me about machine learning."},
    {"role": "assistant", "content": "Machine learning is a subset of AI..."},
    {"role": "user", "content": "What is supervised learning?"}
]

# Get a summary
print("Summary:", summarize_conversation(history))

Exercise 4: Build an Assistant Using the Assistants API

Task:

Create an assistant using OpenAI’s Assistants API that remembers a thread and responds with context.

Solution:

import openai
import time

# Step 1: Create the assistant
assistant = openai.beta.assistants.create(
    name="Memory Coach",
    instructions="You are a memory coach that helps users retain information over time.",
    model="gpt-4o"
)

# Step 2: Create a thread
thread = openai.beta.threads.create()

# Step 3: Add a message
openai.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="Help me remember the difference between RAM and ROM."
)

# Step 4: Run the assistant
run = openai.beta.threads.runs.create(
    assistant_id=assistant.id,
    thread_id=thread.id
)

# Step 5: Wait for completion
while True:
    run_status = openai.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    if run_status.status == "completed":
        break
    time.sleep(1)

# Step 6: Print the assistant’s response
messages = openai.beta.threads.messages.list(thread_id=thread.id)
for msg in messages.data:
    if msg.role == "assistant":
        print("Assistant:", msg.content[0].text.value)

Exercise 5: Compare Chat Completions vs Assistants

Task:

Run the same prompt using both APIs and compare the handling of context.

Solution:

Chat Completions Version:

chat_response = openai.ChatCompletion.create(
    model="gpt-4o",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Explain how HTTP works."}
    ],
    max_tokens=150
)

print("Chat Completion Response:", chat_response["choices"][0]["message"]["content"])

Assistants Version:

# Thread and assistant are assumed to be created already
openai.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="Explain how HTTP works."
)

run = openai.beta.threads.runs.create(
    assistant_id=assistant.id,
    thread_id=thread.id
)

while True:
    run_status = openai.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    if run_status.status == "completed":
        break
    time.sleep(1)

messages = openai.beta.threads.messages.list(thread_id=thread.id)
for msg in messages.data:
    if msg.role == "assistant":
        print("Assistant Response (Assistants API):", msg.content[0].text.value)

These exercises reinforced how to:

  • Manage short-term and long-term memory
  • Handle context limits
  • Apply thread and token management
  • Choose between the Chat Completions and Assistants APIs based on your goals

By mastering these workflows, you’re well-equipped to build smarter, more context-aware assistants that scale with your users and their needs.

Practical Exercises β€” Chapter 7

Exercise 1: Implementing Short-Term Memory in a Conversation

Task:

Create a simple assistant that remembers previous user messages and responds appropriately using short-term memory. Keep the conversation in a local list and send the full history on each turn.

Solution:

import openai
import os
from dotenv import load_dotenv

load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

conversation = [
    {"role": "system", "content": "You are a helpful assistant."}
]

def chat(user_input):
    conversation.append({"role": "user", "content": user_input})

    response = openai.ChatCompletion.create(
        model="gpt-4o",
        messages=conversation,
        max_tokens=150,
        temperature=0.5
    )

    reply = response["choices"][0]["message"]["content"]
    conversation.append({"role": "assistant", "content": reply})
    return reply

# Try a multi-turn conversation
print("User: What’s your favorite programming language?")
print("Assistant:", chat("What’s your favorite programming language?"))

print("\nUser: Can you show me a simple example in it?")
print("Assistant:", chat("Can you show me a simple example in it?"))

Exercise 2: Saving and Retrieving Long-Term Memory

Task:

Simulate persistent memory by storing messages in a JSON file and retrieving them at the start of a new session.

Solution:

import json

MEMORY_FILE = "user_memory.json"

def save_message(message):
    try:
        with open(MEMORY_FILE, "r") as f:
            memory = json.load(f)
    except FileNotFoundError:
        memory = []

    memory.append(message)

    with open(MEMORY_FILE, "w") as f:
        json.dump(memory, f, indent=2)

def load_memory():
    try:
        with open(MEMORY_FILE, "r") as f:
            return json.load(f)
    except FileNotFoundError:
        return []

# Store a sample message
save_message({"role": "user", "content": "How does recursion work?"})
save_message({"role": "assistant", "content": "Recursion is when a function calls itself to solve a problem."})

# Load and review memory
print("Long-Term Memory:", load_memory())

Exercise 3: Summarize Previous Messages to Save Tokens

Task:

Use the Chat Completions API to summarize old messages before injecting them into a new conversation to stay under token limits.

Solution:

def summarize_conversation(history):
    summary_prompt = [
        {"role": "system", "content": "Summarize this conversation briefly:"},
        {"role": "user", "content": "\n".join([f'{m["role"]}: {m["content"]}' for m in history])}
    ]

    response = openai.ChatCompletion.create(
        model="gpt-4o",
        messages=summary_prompt,
        max_tokens=100,
        temperature=0.3
    )

    return {"role": "system", "content": "Summary: " + response["choices"][0]["message"]["content"]}

# Example history
history = [
    {"role": "user", "content": "Tell me about machine learning."},
    {"role": "assistant", "content": "Machine learning is a subset of AI..."},
    {"role": "user", "content": "What is supervised learning?"}
]

# Get a summary
print("Summary:", summarize_conversation(history))

Exercise 4: Build an Assistant Using the Assistants API

Task:

Create an assistant using OpenAI’s Assistants API that remembers a thread and responds with context.

Solution:

import openai
import time

# Step 1: Create the assistant
assistant = openai.beta.assistants.create(
    name="Memory Coach",
    instructions="You are a memory coach that helps users retain information over time.",
    model="gpt-4o"
)

# Step 2: Create a thread
thread = openai.beta.threads.create()

# Step 3: Add a message
openai.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="Help me remember the difference between RAM and ROM."
)

# Step 4: Run the assistant
run = openai.beta.threads.runs.create(
    assistant_id=assistant.id,
    thread_id=thread.id
)

# Step 5: Wait for completion
while True:
    run_status = openai.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    if run_status.status == "completed":
        break
    time.sleep(1)

# Step 6: Print the assistant’s response
messages = openai.beta.threads.messages.list(thread_id=thread.id)
for msg in messages.data:
    if msg.role == "assistant":
        print("Assistant:", msg.content[0].text.value)

Exercise 5: Compare Chat Completions vs Assistants

Task:

Run the same prompt using both APIs and compare the handling of context.

Solution:

Chat Completions Version:

chat_response = openai.ChatCompletion.create(
    model="gpt-4o",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Explain how HTTP works."}
    ],
    max_tokens=150
)

print("Chat Completion Response:", chat_response["choices"][0]["message"]["content"])

Assistants Version:

# Thread and assistant are assumed to be created already
openai.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="Explain how HTTP works."
)

run = openai.beta.threads.runs.create(
    assistant_id=assistant.id,
    thread_id=thread.id
)

while True:
    run_status = openai.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    if run_status.status == "completed":
        break
    time.sleep(1)

messages = openai.beta.threads.messages.list(thread_id=thread.id)
for msg in messages.data:
    if msg.role == "assistant":
        print("Assistant Response (Assistants API):", msg.content[0].text.value)

These exercises reinforced how to:

  • Manage short-term and long-term memory
  • Handle context limits
  • Apply thread and token management
  • Choose between the Chat Completions and Assistants APIs based on your goals

By mastering these workflows, you’re well-equipped to build smarter, more context-aware assistants that scale with your users and their needs.

Practical Exercises β€” Chapter 7

Exercise 1: Implementing Short-Term Memory in a Conversation

Task:

Create a simple assistant that remembers previous user messages and responds appropriately using short-term memory. Keep the conversation in a local list and send the full history on each turn.

Solution:

import openai
import os
from dotenv import load_dotenv

load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

conversation = [
    {"role": "system", "content": "You are a helpful assistant."}
]

def chat(user_input):
    conversation.append({"role": "user", "content": user_input})

    response = openai.ChatCompletion.create(
        model="gpt-4o",
        messages=conversation,
        max_tokens=150,
        temperature=0.5
    )

    reply = response["choices"][0]["message"]["content"]
    conversation.append({"role": "assistant", "content": reply})
    return reply

# Try a multi-turn conversation
print("User: What’s your favorite programming language?")
print("Assistant:", chat("What’s your favorite programming language?"))

print("\nUser: Can you show me a simple example in it?")
print("Assistant:", chat("Can you show me a simple example in it?"))

Exercise 2: Saving and Retrieving Long-Term Memory

Task:

Simulate persistent memory by storing messages in a JSON file and retrieving them at the start of a new session.

Solution:

import json

MEMORY_FILE = "user_memory.json"

def save_message(message):
    try:
        with open(MEMORY_FILE, "r") as f:
            memory = json.load(f)
    except FileNotFoundError:
        memory = []

    memory.append(message)

    with open(MEMORY_FILE, "w") as f:
        json.dump(memory, f, indent=2)

def load_memory():
    try:
        with open(MEMORY_FILE, "r") as f:
            return json.load(f)
    except FileNotFoundError:
        return []

# Store a sample message
save_message({"role": "user", "content": "How does recursion work?"})
save_message({"role": "assistant", "content": "Recursion is when a function calls itself to solve a problem."})

# Load and review memory
print("Long-Term Memory:", load_memory())

Exercise 3: Summarize Previous Messages to Save Tokens

Task:

Use the Chat Completions API to summarize old messages before injecting them into a new conversation to stay under token limits.

Solution:

def summarize_conversation(history):
    summary_prompt = [
        {"role": "system", "content": "Summarize this conversation briefly:"},
        {"role": "user", "content": "\n".join([f'{m["role"]}: {m["content"]}' for m in history])}
    ]

    response = openai.ChatCompletion.create(
        model="gpt-4o",
        messages=summary_prompt,
        max_tokens=100,
        temperature=0.3
    )

    return {"role": "system", "content": "Summary: " + response["choices"][0]["message"]["content"]}

# Example history
history = [
    {"role": "user", "content": "Tell me about machine learning."},
    {"role": "assistant", "content": "Machine learning is a subset of AI..."},
    {"role": "user", "content": "What is supervised learning?"}
]

# Get a summary
print("Summary:", summarize_conversation(history))

Exercise 4: Build an Assistant Using the Assistants API

Task:

Create an assistant using OpenAI’s Assistants API that remembers a thread and responds with context.

Solution:

import openai
import time

# Step 1: Create the assistant
assistant = openai.beta.assistants.create(
    name="Memory Coach",
    instructions="You are a memory coach that helps users retain information over time.",
    model="gpt-4o"
)

# Step 2: Create a thread
thread = openai.beta.threads.create()

# Step 3: Add a message
openai.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="Help me remember the difference between RAM and ROM."
)

# Step 4: Run the assistant
run = openai.beta.threads.runs.create(
    assistant_id=assistant.id,
    thread_id=thread.id
)

# Step 5: Wait for completion
while True:
    run_status = openai.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    if run_status.status == "completed":
        break
    time.sleep(1)

# Step 6: Print the assistant’s response
messages = openai.beta.threads.messages.list(thread_id=thread.id)
for msg in messages.data:
    if msg.role == "assistant":
        print("Assistant:", msg.content[0].text.value)

Exercise 5: Compare Chat Completions vs Assistants

Task:

Run the same prompt using both APIs and compare the handling of context.

Solution:

Chat Completions Version:

chat_response = openai.ChatCompletion.create(
    model="gpt-4o",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Explain how HTTP works."}
    ],
    max_tokens=150
)

print("Chat Completion Response:", chat_response["choices"][0]["message"]["content"])

Assistants Version:

# Thread and assistant are assumed to be created already
openai.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="Explain how HTTP works."
)

run = openai.beta.threads.runs.create(
    assistant_id=assistant.id,
    thread_id=thread.id
)

while True:
    run_status = openai.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    if run_status.status == "completed":
        break
    time.sleep(1)

messages = openai.beta.threads.messages.list(thread_id=thread.id)
for msg in messages.data:
    if msg.role == "assistant":
        print("Assistant Response (Assistants API):", msg.content[0].text.value)

These exercises reinforced how to:

  • Manage short-term and long-term memory
  • Handle context limits
  • Apply thread and token management
  • Choose between the Chat Completions and Assistants APIs based on your goals

By mastering these workflows, you’re well-equipped to build smarter, more context-aware assistants that scale with your users and their needs.

Practical Exercises β€” Chapter 7

Exercise 1: Implementing Short-Term Memory in a Conversation

Task:

Create a simple assistant that remembers previous user messages and responds appropriately using short-term memory. Keep the conversation in a local list and send the full history on each turn.

Solution:

import openai
import os
from dotenv import load_dotenv

load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

conversation = [
    {"role": "system", "content": "You are a helpful assistant."}
]

def chat(user_input):
    conversation.append({"role": "user", "content": user_input})

    response = openai.ChatCompletion.create(
        model="gpt-4o",
        messages=conversation,
        max_tokens=150,
        temperature=0.5
    )

    reply = response["choices"][0]["message"]["content"]
    conversation.append({"role": "assistant", "content": reply})
    return reply

# Try a multi-turn conversation
print("User: What’s your favorite programming language?")
print("Assistant:", chat("What’s your favorite programming language?"))

print("\nUser: Can you show me a simple example in it?")
print("Assistant:", chat("Can you show me a simple example in it?"))

Exercise 2: Saving and Retrieving Long-Term Memory

Task:

Simulate persistent memory by storing messages in a JSON file and retrieving them at the start of a new session.

Solution:

import json

MEMORY_FILE = "user_memory.json"

def save_message(message):
    try:
        with open(MEMORY_FILE, "r") as f:
            memory = json.load(f)
    except FileNotFoundError:
        memory = []

    memory.append(message)

    with open(MEMORY_FILE, "w") as f:
        json.dump(memory, f, indent=2)

def load_memory():
    try:
        with open(MEMORY_FILE, "r") as f:
            return json.load(f)
    except FileNotFoundError:
        return []

# Store a sample message
save_message({"role": "user", "content": "How does recursion work?"})
save_message({"role": "assistant", "content": "Recursion is when a function calls itself to solve a problem."})

# Load and review memory
print("Long-Term Memory:", load_memory())

Exercise 3: Summarize Previous Messages to Save Tokens

Task:

Use the Chat Completions API to summarize old messages before injecting them into a new conversation to stay under token limits.

Solution:

def summarize_conversation(history):
    summary_prompt = [
        {"role": "system", "content": "Summarize this conversation briefly:"},
        {"role": "user", "content": "\n".join([f'{m["role"]}: {m["content"]}' for m in history])}
    ]

    response = openai.ChatCompletion.create(
        model="gpt-4o",
        messages=summary_prompt,
        max_tokens=100,
        temperature=0.3
    )

    return {"role": "system", "content": "Summary: " + response["choices"][0]["message"]["content"]}

# Example history
history = [
    {"role": "user", "content": "Tell me about machine learning."},
    {"role": "assistant", "content": "Machine learning is a subset of AI..."},
    {"role": "user", "content": "What is supervised learning?"}
]

# Get a summary
print("Summary:", summarize_conversation(history))

Exercise 4: Build an Assistant Using the Assistants API

Task:

Create an assistant using OpenAI’s Assistants API that remembers a thread and responds with context.

Solution:

import openai
import time

# Step 1: Create the assistant
assistant = openai.beta.assistants.create(
    name="Memory Coach",
    instructions="You are a memory coach that helps users retain information over time.",
    model="gpt-4o"
)

# Step 2: Create a thread
thread = openai.beta.threads.create()

# Step 3: Add a message
openai.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="Help me remember the difference between RAM and ROM."
)

# Step 4: Run the assistant
run = openai.beta.threads.runs.create(
    assistant_id=assistant.id,
    thread_id=thread.id
)

# Step 5: Wait for completion
while True:
    run_status = openai.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    if run_status.status == "completed":
        break
    time.sleep(1)

# Step 6: Print the assistant’s response
messages = openai.beta.threads.messages.list(thread_id=thread.id)
for msg in messages.data:
    if msg.role == "assistant":
        print("Assistant:", msg.content[0].text.value)

Exercise 5: Compare Chat Completions vs Assistants

Task:

Run the same prompt using both APIs and compare the handling of context.

Solution:

Chat Completions Version:

chat_response = openai.ChatCompletion.create(
    model="gpt-4o",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Explain how HTTP works."}
    ],
    max_tokens=150
)

print("Chat Completion Response:", chat_response["choices"][0]["message"]["content"])

Assistants Version:

# Thread and assistant are assumed to be created already
openai.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="Explain how HTTP works."
)

run = openai.beta.threads.runs.create(
    assistant_id=assistant.id,
    thread_id=thread.id
)

while True:
    run_status = openai.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
    if run_status.status == "completed":
        break
    time.sleep(1)

messages = openai.beta.threads.messages.list(thread_id=thread.id)
for msg in messages.data:
    if msg.role == "assistant":
        print("Assistant Response (Assistants API):", msg.content[0].text.value)

These exercises reinforced how to:

  • Manage short-term and long-term memory
  • Handle context limits
  • Apply thread and token management
  • Choose between the Chat Completions and Assistants APIs based on your goals

By mastering these workflows, you’re well-equipped to build smarter, more context-aware assistants that scale with your users and their needs.