Chapter 7: Memory and Multi-Turn Conversations
Practical Exercises β Chapter 7
Exercise 1: Implementing Short-Term Memory in a Conversation
Task:
Create a simple assistant that remembers previous user messages and responds appropriately using short-term memory. Keep the conversation in a local list and send the full history on each turn.
Solution:
import openai
import os
from dotenv import load_dotenv
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
conversation = [
{"role": "system", "content": "You are a helpful assistant."}
]
def chat(user_input):
conversation.append({"role": "user", "content": user_input})
response = openai.ChatCompletion.create(
model="gpt-4o",
messages=conversation,
max_tokens=150,
temperature=0.5
)
reply = response["choices"][0]["message"]["content"]
conversation.append({"role": "assistant", "content": reply})
return reply
# Try a multi-turn conversation
print("User: What’s your favorite programming language?")
print("Assistant:", chat("What’s your favorite programming language?"))
print("\nUser: Can you show me a simple example in it?")
print("Assistant:", chat("Can you show me a simple example in it?"))
Exercise 2: Saving and Retrieving Long-Term Memory
Task:
Simulate persistent memory by storing messages in a JSON file and retrieving them at the start of a new session.
Solution:
import json
MEMORY_FILE = "user_memory.json"
def save_message(message):
try:
with open(MEMORY_FILE, "r") as f:
memory = json.load(f)
except FileNotFoundError:
memory = []
memory.append(message)
with open(MEMORY_FILE, "w") as f:
json.dump(memory, f, indent=2)
def load_memory():
try:
with open(MEMORY_FILE, "r") as f:
return json.load(f)
except FileNotFoundError:
return []
# Store a sample message
save_message({"role": "user", "content": "How does recursion work?"})
save_message({"role": "assistant", "content": "Recursion is when a function calls itself to solve a problem."})
# Load and review memory
print("Long-Term Memory:", load_memory())
Exercise 3: Summarize Previous Messages to Save Tokens
Task:
Use the Chat Completions API to summarize old messages before injecting them into a new conversation to stay under token limits.
Solution:
def summarize_conversation(history):
summary_prompt = [
{"role": "system", "content": "Summarize this conversation briefly:"},
{"role": "user", "content": "\n".join([f'{m["role"]}: {m["content"]}' for m in history])}
]
response = openai.ChatCompletion.create(
model="gpt-4o",
messages=summary_prompt,
max_tokens=100,
temperature=0.3
)
return {"role": "system", "content": "Summary: " + response["choices"][0]["message"]["content"]}
# Example history
history = [
{"role": "user", "content": "Tell me about machine learning."},
{"role": "assistant", "content": "Machine learning is a subset of AI..."},
{"role": "user", "content": "What is supervised learning?"}
]
# Get a summary
print("Summary:", summarize_conversation(history))
Exercise 4: Build an Assistant Using the Assistants API
Task:
Create an assistant using OpenAI’s Assistants API that remembers a thread and responds with context.
Solution:
import openai
import time
# Step 1: Create the assistant
assistant = openai.beta.assistants.create(
name="Memory Coach",
instructions="You are a memory coach that helps users retain information over time.",
model="gpt-4o"
)
# Step 2: Create a thread
thread = openai.beta.threads.create()
# Step 3: Add a message
openai.beta.threads.messages.create(
thread_id=thread.id,
role="user",
content="Help me remember the difference between RAM and ROM."
)
# Step 4: Run the assistant
run = openai.beta.threads.runs.create(
assistant_id=assistant.id,
thread_id=thread.id
)
# Step 5: Wait for completion
while True:
run_status = openai.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
if run_status.status == "completed":
break
time.sleep(1)
# Step 6: Print the assistant’s response
messages = openai.beta.threads.messages.list(thread_id=thread.id)
for msg in messages.data:
if msg.role == "assistant":
print("Assistant:", msg.content[0].text.value)
Exercise 5: Compare Chat Completions vs Assistants
Task:
Run the same prompt using both APIs and compare the handling of context.
Solution:
Chat Completions Version:
chat_response = openai.ChatCompletion.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Explain how HTTP works."}
],
max_tokens=150
)
print("Chat Completion Response:", chat_response["choices"][0]["message"]["content"])
Assistants Version:
# Thread and assistant are assumed to be created already
openai.beta.threads.messages.create(
thread_id=thread.id,
role="user",
content="Explain how HTTP works."
)
run = openai.beta.threads.runs.create(
assistant_id=assistant.id,
thread_id=thread.id
)
while True:
run_status = openai.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
if run_status.status == "completed":
break
time.sleep(1)
messages = openai.beta.threads.messages.list(thread_id=thread.id)
for msg in messages.data:
if msg.role == "assistant":
print("Assistant Response (Assistants API):", msg.content[0].text.value)
These exercises reinforced how to:
- Manage short-term and long-term memory
- Handle context limits
- Apply thread and token management
- Choose between the Chat Completions and Assistants APIs based on your goals
By mastering these workflows, you’re well-equipped to build smarter, more context-aware assistants that scale with your users and their needs.
Practical Exercises β Chapter 7
Exercise 1: Implementing Short-Term Memory in a Conversation
Task:
Create a simple assistant that remembers previous user messages and responds appropriately using short-term memory. Keep the conversation in a local list and send the full history on each turn.
Solution:
import openai
import os
from dotenv import load_dotenv
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
conversation = [
{"role": "system", "content": "You are a helpful assistant."}
]
def chat(user_input):
conversation.append({"role": "user", "content": user_input})
response = openai.ChatCompletion.create(
model="gpt-4o",
messages=conversation,
max_tokens=150,
temperature=0.5
)
reply = response["choices"][0]["message"]["content"]
conversation.append({"role": "assistant", "content": reply})
return reply
# Try a multi-turn conversation
print("User: What’s your favorite programming language?")
print("Assistant:", chat("What’s your favorite programming language?"))
print("\nUser: Can you show me a simple example in it?")
print("Assistant:", chat("Can you show me a simple example in it?"))
Exercise 2: Saving and Retrieving Long-Term Memory
Task:
Simulate persistent memory by storing messages in a JSON file and retrieving them at the start of a new session.
Solution:
import json
MEMORY_FILE = "user_memory.json"
def save_message(message):
try:
with open(MEMORY_FILE, "r") as f:
memory = json.load(f)
except FileNotFoundError:
memory = []
memory.append(message)
with open(MEMORY_FILE, "w") as f:
json.dump(memory, f, indent=2)
def load_memory():
try:
with open(MEMORY_FILE, "r") as f:
return json.load(f)
except FileNotFoundError:
return []
# Store a sample message
save_message({"role": "user", "content": "How does recursion work?"})
save_message({"role": "assistant", "content": "Recursion is when a function calls itself to solve a problem."})
# Load and review memory
print("Long-Term Memory:", load_memory())
Exercise 3: Summarize Previous Messages to Save Tokens
Task:
Use the Chat Completions API to summarize old messages before injecting them into a new conversation to stay under token limits.
Solution:
def summarize_conversation(history):
summary_prompt = [
{"role": "system", "content": "Summarize this conversation briefly:"},
{"role": "user", "content": "\n".join([f'{m["role"]}: {m["content"]}' for m in history])}
]
response = openai.ChatCompletion.create(
model="gpt-4o",
messages=summary_prompt,
max_tokens=100,
temperature=0.3
)
return {"role": "system", "content": "Summary: " + response["choices"][0]["message"]["content"]}
# Example history
history = [
{"role": "user", "content": "Tell me about machine learning."},
{"role": "assistant", "content": "Machine learning is a subset of AI..."},
{"role": "user", "content": "What is supervised learning?"}
]
# Get a summary
print("Summary:", summarize_conversation(history))
Exercise 4: Build an Assistant Using the Assistants API
Task:
Create an assistant using OpenAI’s Assistants API that remembers a thread and responds with context.
Solution:
import openai
import time
# Step 1: Create the assistant
assistant = openai.beta.assistants.create(
name="Memory Coach",
instructions="You are a memory coach that helps users retain information over time.",
model="gpt-4o"
)
# Step 2: Create a thread
thread = openai.beta.threads.create()
# Step 3: Add a message
openai.beta.threads.messages.create(
thread_id=thread.id,
role="user",
content="Help me remember the difference between RAM and ROM."
)
# Step 4: Run the assistant
run = openai.beta.threads.runs.create(
assistant_id=assistant.id,
thread_id=thread.id
)
# Step 5: Wait for completion
while True:
run_status = openai.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
if run_status.status == "completed":
break
time.sleep(1)
# Step 6: Print the assistant’s response
messages = openai.beta.threads.messages.list(thread_id=thread.id)
for msg in messages.data:
if msg.role == "assistant":
print("Assistant:", msg.content[0].text.value)
Exercise 5: Compare Chat Completions vs Assistants
Task:
Run the same prompt using both APIs and compare the handling of context.
Solution:
Chat Completions Version:
chat_response = openai.ChatCompletion.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Explain how HTTP works."}
],
max_tokens=150
)
print("Chat Completion Response:", chat_response["choices"][0]["message"]["content"])
Assistants Version:
# Thread and assistant are assumed to be created already
openai.beta.threads.messages.create(
thread_id=thread.id,
role="user",
content="Explain how HTTP works."
)
run = openai.beta.threads.runs.create(
assistant_id=assistant.id,
thread_id=thread.id
)
while True:
run_status = openai.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
if run_status.status == "completed":
break
time.sleep(1)
messages = openai.beta.threads.messages.list(thread_id=thread.id)
for msg in messages.data:
if msg.role == "assistant":
print("Assistant Response (Assistants API):", msg.content[0].text.value)
These exercises reinforced how to:
- Manage short-term and long-term memory
- Handle context limits
- Apply thread and token management
- Choose between the Chat Completions and Assistants APIs based on your goals
By mastering these workflows, you’re well-equipped to build smarter, more context-aware assistants that scale with your users and their needs.
Practical Exercises β Chapter 7
Exercise 1: Implementing Short-Term Memory in a Conversation
Task:
Create a simple assistant that remembers previous user messages and responds appropriately using short-term memory. Keep the conversation in a local list and send the full history on each turn.
Solution:
import openai
import os
from dotenv import load_dotenv
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
conversation = [
{"role": "system", "content": "You are a helpful assistant."}
]
def chat(user_input):
conversation.append({"role": "user", "content": user_input})
response = openai.ChatCompletion.create(
model="gpt-4o",
messages=conversation,
max_tokens=150,
temperature=0.5
)
reply = response["choices"][0]["message"]["content"]
conversation.append({"role": "assistant", "content": reply})
return reply
# Try a multi-turn conversation
print("User: What’s your favorite programming language?")
print("Assistant:", chat("What’s your favorite programming language?"))
print("\nUser: Can you show me a simple example in it?")
print("Assistant:", chat("Can you show me a simple example in it?"))
Exercise 2: Saving and Retrieving Long-Term Memory
Task:
Simulate persistent memory by storing messages in a JSON file and retrieving them at the start of a new session.
Solution:
import json
MEMORY_FILE = "user_memory.json"
def save_message(message):
try:
with open(MEMORY_FILE, "r") as f:
memory = json.load(f)
except FileNotFoundError:
memory = []
memory.append(message)
with open(MEMORY_FILE, "w") as f:
json.dump(memory, f, indent=2)
def load_memory():
try:
with open(MEMORY_FILE, "r") as f:
return json.load(f)
except FileNotFoundError:
return []
# Store a sample message
save_message({"role": "user", "content": "How does recursion work?"})
save_message({"role": "assistant", "content": "Recursion is when a function calls itself to solve a problem."})
# Load and review memory
print("Long-Term Memory:", load_memory())
Exercise 3: Summarize Previous Messages to Save Tokens
Task:
Use the Chat Completions API to summarize old messages before injecting them into a new conversation to stay under token limits.
Solution:
def summarize_conversation(history):
summary_prompt = [
{"role": "system", "content": "Summarize this conversation briefly:"},
{"role": "user", "content": "\n".join([f'{m["role"]}: {m["content"]}' for m in history])}
]
response = openai.ChatCompletion.create(
model="gpt-4o",
messages=summary_prompt,
max_tokens=100,
temperature=0.3
)
return {"role": "system", "content": "Summary: " + response["choices"][0]["message"]["content"]}
# Example history
history = [
{"role": "user", "content": "Tell me about machine learning."},
{"role": "assistant", "content": "Machine learning is a subset of AI..."},
{"role": "user", "content": "What is supervised learning?"}
]
# Get a summary
print("Summary:", summarize_conversation(history))
Exercise 4: Build an Assistant Using the Assistants API
Task:
Create an assistant using OpenAI’s Assistants API that remembers a thread and responds with context.
Solution:
import openai
import time
# Step 1: Create the assistant
assistant = openai.beta.assistants.create(
name="Memory Coach",
instructions="You are a memory coach that helps users retain information over time.",
model="gpt-4o"
)
# Step 2: Create a thread
thread = openai.beta.threads.create()
# Step 3: Add a message
openai.beta.threads.messages.create(
thread_id=thread.id,
role="user",
content="Help me remember the difference between RAM and ROM."
)
# Step 4: Run the assistant
run = openai.beta.threads.runs.create(
assistant_id=assistant.id,
thread_id=thread.id
)
# Step 5: Wait for completion
while True:
run_status = openai.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
if run_status.status == "completed":
break
time.sleep(1)
# Step 6: Print the assistant’s response
messages = openai.beta.threads.messages.list(thread_id=thread.id)
for msg in messages.data:
if msg.role == "assistant":
print("Assistant:", msg.content[0].text.value)
Exercise 5: Compare Chat Completions vs Assistants
Task:
Run the same prompt using both APIs and compare the handling of context.
Solution:
Chat Completions Version:
chat_response = openai.ChatCompletion.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Explain how HTTP works."}
],
max_tokens=150
)
print("Chat Completion Response:", chat_response["choices"][0]["message"]["content"])
Assistants Version:
# Thread and assistant are assumed to be created already
openai.beta.threads.messages.create(
thread_id=thread.id,
role="user",
content="Explain how HTTP works."
)
run = openai.beta.threads.runs.create(
assistant_id=assistant.id,
thread_id=thread.id
)
while True:
run_status = openai.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
if run_status.status == "completed":
break
time.sleep(1)
messages = openai.beta.threads.messages.list(thread_id=thread.id)
for msg in messages.data:
if msg.role == "assistant":
print("Assistant Response (Assistants API):", msg.content[0].text.value)
These exercises reinforced how to:
- Manage short-term and long-term memory
- Handle context limits
- Apply thread and token management
- Choose between the Chat Completions and Assistants APIs based on your goals
By mastering these workflows, you’re well-equipped to build smarter, more context-aware assistants that scale with your users and their needs.
Practical Exercises β Chapter 7
Exercise 1: Implementing Short-Term Memory in a Conversation
Task:
Create a simple assistant that remembers previous user messages and responds appropriately using short-term memory. Keep the conversation in a local list and send the full history on each turn.
Solution:
import openai
import os
from dotenv import load_dotenv
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
conversation = [
{"role": "system", "content": "You are a helpful assistant."}
]
def chat(user_input):
conversation.append({"role": "user", "content": user_input})
response = openai.ChatCompletion.create(
model="gpt-4o",
messages=conversation,
max_tokens=150,
temperature=0.5
)
reply = response["choices"][0]["message"]["content"]
conversation.append({"role": "assistant", "content": reply})
return reply
# Try a multi-turn conversation
print("User: What’s your favorite programming language?")
print("Assistant:", chat("What’s your favorite programming language?"))
print("\nUser: Can you show me a simple example in it?")
print("Assistant:", chat("Can you show me a simple example in it?"))
Exercise 2: Saving and Retrieving Long-Term Memory
Task:
Simulate persistent memory by storing messages in a JSON file and retrieving them at the start of a new session.
Solution:
import json
MEMORY_FILE = "user_memory.json"
def save_message(message):
try:
with open(MEMORY_FILE, "r") as f:
memory = json.load(f)
except FileNotFoundError:
memory = []
memory.append(message)
with open(MEMORY_FILE, "w") as f:
json.dump(memory, f, indent=2)
def load_memory():
try:
with open(MEMORY_FILE, "r") as f:
return json.load(f)
except FileNotFoundError:
return []
# Store a sample message
save_message({"role": "user", "content": "How does recursion work?"})
save_message({"role": "assistant", "content": "Recursion is when a function calls itself to solve a problem."})
# Load and review memory
print("Long-Term Memory:", load_memory())
Exercise 3: Summarize Previous Messages to Save Tokens
Task:
Use the Chat Completions API to summarize old messages before injecting them into a new conversation to stay under token limits.
Solution:
def summarize_conversation(history):
summary_prompt = [
{"role": "system", "content": "Summarize this conversation briefly:"},
{"role": "user", "content": "\n".join([f'{m["role"]}: {m["content"]}' for m in history])}
]
response = openai.ChatCompletion.create(
model="gpt-4o",
messages=summary_prompt,
max_tokens=100,
temperature=0.3
)
return {"role": "system", "content": "Summary: " + response["choices"][0]["message"]["content"]}
# Example history
history = [
{"role": "user", "content": "Tell me about machine learning."},
{"role": "assistant", "content": "Machine learning is a subset of AI..."},
{"role": "user", "content": "What is supervised learning?"}
]
# Get a summary
print("Summary:", summarize_conversation(history))
Exercise 4: Build an Assistant Using the Assistants API
Task:
Create an assistant using OpenAI’s Assistants API that remembers a thread and responds with context.
Solution:
import openai
import time
# Step 1: Create the assistant
assistant = openai.beta.assistants.create(
name="Memory Coach",
instructions="You are a memory coach that helps users retain information over time.",
model="gpt-4o"
)
# Step 2: Create a thread
thread = openai.beta.threads.create()
# Step 3: Add a message
openai.beta.threads.messages.create(
thread_id=thread.id,
role="user",
content="Help me remember the difference between RAM and ROM."
)
# Step 4: Run the assistant
run = openai.beta.threads.runs.create(
assistant_id=assistant.id,
thread_id=thread.id
)
# Step 5: Wait for completion
while True:
run_status = openai.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
if run_status.status == "completed":
break
time.sleep(1)
# Step 6: Print the assistant’s response
messages = openai.beta.threads.messages.list(thread_id=thread.id)
for msg in messages.data:
if msg.role == "assistant":
print("Assistant:", msg.content[0].text.value)
Exercise 5: Compare Chat Completions vs Assistants
Task:
Run the same prompt using both APIs and compare the handling of context.
Solution:
Chat Completions Version:
chat_response = openai.ChatCompletion.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Explain how HTTP works."}
],
max_tokens=150
)
print("Chat Completion Response:", chat_response["choices"][0]["message"]["content"])
Assistants Version:
# Thread and assistant are assumed to be created already
openai.beta.threads.messages.create(
thread_id=thread.id,
role="user",
content="Explain how HTTP works."
)
run = openai.beta.threads.runs.create(
assistant_id=assistant.id,
thread_id=thread.id
)
while True:
run_status = openai.beta.threads.runs.retrieve(run.id, thread_id=thread.id)
if run_status.status == "completed":
break
time.sleep(1)
messages = openai.beta.threads.messages.list(thread_id=thread.id)
for msg in messages.data:
if msg.role == "assistant":
print("Assistant Response (Assistants API):", msg.content[0].text.value)
These exercises reinforced how to:
- Manage short-term and long-term memory
- Handle context limits
- Apply thread and token management
- Choose between the Chat Completions and Assistants APIs based on your goals
By mastering these workflows, you’re well-equipped to build smarter, more context-aware assistants that scale with your users and their needs.