Chapter 3: Embeddings and Semantic Search
Practical Exercises — Chapter 3
Exercise 1: Generate Embeddings for a List of Texts
Task:
Create embeddings for a list of product descriptions using OpenAI’s text-embedding-3-small
model and print the dimension of each vector.
Solution:
import openai
import os
from dotenv import load_dotenv
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
products = [
"Eco-friendly water bottle made from recycled plastic",
"Wireless noise-cancelling headphones",
"Ergonomic office chair with lumbar support"
]
embeddings = []
for product in products:
response = openai.Embedding.create(input=product, model="text-embedding-3-small")
vector = response["data"][0]["embedding"]
embeddings.append(vector)
print(f"🔢 Vector length: {len(vector)}")
Exercise 2: Calculate Similarity Between Two Texts
Task:
Compare the semantic similarity between two user reviews using cosine similarity.
Solution:
from numpy import dot
from numpy.linalg import norm
review_1 = "The delivery was super fast and the packaging was great!"
review_2 = "Shipping was quick and everything arrived safely."
def get_embedding(text):
return openai.Embedding.create(input=text, model="text-embedding-3-small")["data"][0]["embedding"]
vec1 = get_embedding(review_1)
vec2 = get_embedding(review_2)
similarity = dot(vec1, vec2) / (norm(vec1) * norm(vec2))
print("🔗 Semantic Similarity:", round(similarity, 3))
Exercise 3: Create and Query a FAISS Index
Task:
Index a set of support articles and search for the most relevant one based on a query.
Solution:
import faiss
import numpy as np
articles = [
"Resetting your password from the login page",
"Changing your profile photo and bio",
"Canceling your subscription and managing refunds"
]
# Generate embeddings
vectors = [get_embedding(article) for article in articles]
matrix = np.array(vectors).astype("float32")
faiss.normalize_L2(matrix)
# Create FAISS index
index = faiss.IndexFlatIP(len(matrix[0]))
index.add(matrix)
# Query
query = "How do I cancel my membership?"
query_vec = np.array([get_embedding(query)]).astype("float32")
faiss.normalize_L2(query_vec)
# Search
similarities, idxs = index.search(query_vec, k=2)
for i in idxs[0]:
print("📄 Match:", articles[i])
Exercise 4: Build a Simple Semantic Search App with Pinecone
Task:
Use Pinecone to store embeddings and return the best-matching document for a user question.
Solution:
import pinecone
pinecone.init(
api_key=os.getenv("PINECONE_API_KEY"),
environment=os.getenv("PINECONE_ENV")
)
# Create or connect to index
index_name = "quick-search"
if index_name not in pinecone.list_indexes():
pinecone.create_index(index_name, dimension=1536, metric="cosine")
index = pinecone.Index(index_name)
docs = {
"a1": "How to export data from your dashboard",
"a2": "Steps to upgrade your subscription plan",
"a3": "Changing your registered email address"
}
# Insert documents
to_upsert = [
(doc_id, get_embedding(text), {"text": text})
for doc_id, text in docs.items()
]
index.upsert(vectors=to_upsert)
# Search
user_query = "Can I switch to a higher plan?"
query_vec = get_embedding(user_query)
results = index.query(vector=query_vec, top_k=1, include_metadata=True)
print("🔍 Top match:", results["matches"][0]["metadata"]["text"])
Exercise 5: Recommend Similar Blog Posts
Task:
Use embeddings to recommend blog posts that are semantically related to a given one.
Solution:
posts = [
"10 Python tricks you didn’t know",
"Deploying machine learning models with FastAPI",
"Understanding transformer models in NLP",
"Intro to JavaScript for data scientists"
]
# Embed posts
post_embeddings = [get_embedding(post) for post in posts]
# Define base post
base = "How transformers work in deep learning"
base_vec = get_embedding(base)
# Compute similarity
scores = [dot(base_vec, vec) / (norm(base_vec) * norm(vec)) for vec in post_embeddings]
top_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:2]
print("📚 Recommended Posts:")
for i in top_indices:
print("-", posts[i])
Summary of What You Practiced
- Creating OpenAI embeddings from raw text
- Comparing vectors with cosine similarity
- Building a FAISS index for fast local vector search
- Using Pinecone to persist and query vectors in the cloud
- Making real-world recommendations based on semantic similarity
These exercises have equipped you with the core workflows that power intelligent assistants, chatbots, recommendation systems, and semantic search tools at scale.
Practical Exercises — Chapter 3
Exercise 1: Generate Embeddings for a List of Texts
Task:
Create embeddings for a list of product descriptions using OpenAI’s text-embedding-3-small
model and print the dimension of each vector.
Solution:
import openai
import os
from dotenv import load_dotenv
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
products = [
"Eco-friendly water bottle made from recycled plastic",
"Wireless noise-cancelling headphones",
"Ergonomic office chair with lumbar support"
]
embeddings = []
for product in products:
response = openai.Embedding.create(input=product, model="text-embedding-3-small")
vector = response["data"][0]["embedding"]
embeddings.append(vector)
print(f"🔢 Vector length: {len(vector)}")
Exercise 2: Calculate Similarity Between Two Texts
Task:
Compare the semantic similarity between two user reviews using cosine similarity.
Solution:
from numpy import dot
from numpy.linalg import norm
review_1 = "The delivery was super fast and the packaging was great!"
review_2 = "Shipping was quick and everything arrived safely."
def get_embedding(text):
return openai.Embedding.create(input=text, model="text-embedding-3-small")["data"][0]["embedding"]
vec1 = get_embedding(review_1)
vec2 = get_embedding(review_2)
similarity = dot(vec1, vec2) / (norm(vec1) * norm(vec2))
print("🔗 Semantic Similarity:", round(similarity, 3))
Exercise 3: Create and Query a FAISS Index
Task:
Index a set of support articles and search for the most relevant one based on a query.
Solution:
import faiss
import numpy as np
articles = [
"Resetting your password from the login page",
"Changing your profile photo and bio",
"Canceling your subscription and managing refunds"
]
# Generate embeddings
vectors = [get_embedding(article) for article in articles]
matrix = np.array(vectors).astype("float32")
faiss.normalize_L2(matrix)
# Create FAISS index
index = faiss.IndexFlatIP(len(matrix[0]))
index.add(matrix)
# Query
query = "How do I cancel my membership?"
query_vec = np.array([get_embedding(query)]).astype("float32")
faiss.normalize_L2(query_vec)
# Search
similarities, idxs = index.search(query_vec, k=2)
for i in idxs[0]:
print("📄 Match:", articles[i])
Exercise 4: Build a Simple Semantic Search App with Pinecone
Task:
Use Pinecone to store embeddings and return the best-matching document for a user question.
Solution:
import pinecone
pinecone.init(
api_key=os.getenv("PINECONE_API_KEY"),
environment=os.getenv("PINECONE_ENV")
)
# Create or connect to index
index_name = "quick-search"
if index_name not in pinecone.list_indexes():
pinecone.create_index(index_name, dimension=1536, metric="cosine")
index = pinecone.Index(index_name)
docs = {
"a1": "How to export data from your dashboard",
"a2": "Steps to upgrade your subscription plan",
"a3": "Changing your registered email address"
}
# Insert documents
to_upsert = [
(doc_id, get_embedding(text), {"text": text})
for doc_id, text in docs.items()
]
index.upsert(vectors=to_upsert)
# Search
user_query = "Can I switch to a higher plan?"
query_vec = get_embedding(user_query)
results = index.query(vector=query_vec, top_k=1, include_metadata=True)
print("🔍 Top match:", results["matches"][0]["metadata"]["text"])
Exercise 5: Recommend Similar Blog Posts
Task:
Use embeddings to recommend blog posts that are semantically related to a given one.
Solution:
posts = [
"10 Python tricks you didn’t know",
"Deploying machine learning models with FastAPI",
"Understanding transformer models in NLP",
"Intro to JavaScript for data scientists"
]
# Embed posts
post_embeddings = [get_embedding(post) for post in posts]
# Define base post
base = "How transformers work in deep learning"
base_vec = get_embedding(base)
# Compute similarity
scores = [dot(base_vec, vec) / (norm(base_vec) * norm(vec)) for vec in post_embeddings]
top_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:2]
print("📚 Recommended Posts:")
for i in top_indices:
print("-", posts[i])
Summary of What You Practiced
- Creating OpenAI embeddings from raw text
- Comparing vectors with cosine similarity
- Building a FAISS index for fast local vector search
- Using Pinecone to persist and query vectors in the cloud
- Making real-world recommendations based on semantic similarity
These exercises have equipped you with the core workflows that power intelligent assistants, chatbots, recommendation systems, and semantic search tools at scale.
Practical Exercises — Chapter 3
Exercise 1: Generate Embeddings for a List of Texts
Task:
Create embeddings for a list of product descriptions using OpenAI’s text-embedding-3-small
model and print the dimension of each vector.
Solution:
import openai
import os
from dotenv import load_dotenv
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
products = [
"Eco-friendly water bottle made from recycled plastic",
"Wireless noise-cancelling headphones",
"Ergonomic office chair with lumbar support"
]
embeddings = []
for product in products:
response = openai.Embedding.create(input=product, model="text-embedding-3-small")
vector = response["data"][0]["embedding"]
embeddings.append(vector)
print(f"🔢 Vector length: {len(vector)}")
Exercise 2: Calculate Similarity Between Two Texts
Task:
Compare the semantic similarity between two user reviews using cosine similarity.
Solution:
from numpy import dot
from numpy.linalg import norm
review_1 = "The delivery was super fast and the packaging was great!"
review_2 = "Shipping was quick and everything arrived safely."
def get_embedding(text):
return openai.Embedding.create(input=text, model="text-embedding-3-small")["data"][0]["embedding"]
vec1 = get_embedding(review_1)
vec2 = get_embedding(review_2)
similarity = dot(vec1, vec2) / (norm(vec1) * norm(vec2))
print("🔗 Semantic Similarity:", round(similarity, 3))
Exercise 3: Create and Query a FAISS Index
Task:
Index a set of support articles and search for the most relevant one based on a query.
Solution:
import faiss
import numpy as np
articles = [
"Resetting your password from the login page",
"Changing your profile photo and bio",
"Canceling your subscription and managing refunds"
]
# Generate embeddings
vectors = [get_embedding(article) for article in articles]
matrix = np.array(vectors).astype("float32")
faiss.normalize_L2(matrix)
# Create FAISS index
index = faiss.IndexFlatIP(len(matrix[0]))
index.add(matrix)
# Query
query = "How do I cancel my membership?"
query_vec = np.array([get_embedding(query)]).astype("float32")
faiss.normalize_L2(query_vec)
# Search
similarities, idxs = index.search(query_vec, k=2)
for i in idxs[0]:
print("📄 Match:", articles[i])
Exercise 4: Build a Simple Semantic Search App with Pinecone
Task:
Use Pinecone to store embeddings and return the best-matching document for a user question.
Solution:
import pinecone
pinecone.init(
api_key=os.getenv("PINECONE_API_KEY"),
environment=os.getenv("PINECONE_ENV")
)
# Create or connect to index
index_name = "quick-search"
if index_name not in pinecone.list_indexes():
pinecone.create_index(index_name, dimension=1536, metric="cosine")
index = pinecone.Index(index_name)
docs = {
"a1": "How to export data from your dashboard",
"a2": "Steps to upgrade your subscription plan",
"a3": "Changing your registered email address"
}
# Insert documents
to_upsert = [
(doc_id, get_embedding(text), {"text": text})
for doc_id, text in docs.items()
]
index.upsert(vectors=to_upsert)
# Search
user_query = "Can I switch to a higher plan?"
query_vec = get_embedding(user_query)
results = index.query(vector=query_vec, top_k=1, include_metadata=True)
print("🔍 Top match:", results["matches"][0]["metadata"]["text"])
Exercise 5: Recommend Similar Blog Posts
Task:
Use embeddings to recommend blog posts that are semantically related to a given one.
Solution:
posts = [
"10 Python tricks you didn’t know",
"Deploying machine learning models with FastAPI",
"Understanding transformer models in NLP",
"Intro to JavaScript for data scientists"
]
# Embed posts
post_embeddings = [get_embedding(post) for post in posts]
# Define base post
base = "How transformers work in deep learning"
base_vec = get_embedding(base)
# Compute similarity
scores = [dot(base_vec, vec) / (norm(base_vec) * norm(vec)) for vec in post_embeddings]
top_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:2]
print("📚 Recommended Posts:")
for i in top_indices:
print("-", posts[i])
Summary of What You Practiced
- Creating OpenAI embeddings from raw text
- Comparing vectors with cosine similarity
- Building a FAISS index for fast local vector search
- Using Pinecone to persist and query vectors in the cloud
- Making real-world recommendations based on semantic similarity
These exercises have equipped you with the core workflows that power intelligent assistants, chatbots, recommendation systems, and semantic search tools at scale.
Practical Exercises — Chapter 3
Exercise 1: Generate Embeddings for a List of Texts
Task:
Create embeddings for a list of product descriptions using OpenAI’s text-embedding-3-small
model and print the dimension of each vector.
Solution:
import openai
import os
from dotenv import load_dotenv
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
products = [
"Eco-friendly water bottle made from recycled plastic",
"Wireless noise-cancelling headphones",
"Ergonomic office chair with lumbar support"
]
embeddings = []
for product in products:
response = openai.Embedding.create(input=product, model="text-embedding-3-small")
vector = response["data"][0]["embedding"]
embeddings.append(vector)
print(f"🔢 Vector length: {len(vector)}")
Exercise 2: Calculate Similarity Between Two Texts
Task:
Compare the semantic similarity between two user reviews using cosine similarity.
Solution:
from numpy import dot
from numpy.linalg import norm
review_1 = "The delivery was super fast and the packaging was great!"
review_2 = "Shipping was quick and everything arrived safely."
def get_embedding(text):
return openai.Embedding.create(input=text, model="text-embedding-3-small")["data"][0]["embedding"]
vec1 = get_embedding(review_1)
vec2 = get_embedding(review_2)
similarity = dot(vec1, vec2) / (norm(vec1) * norm(vec2))
print("🔗 Semantic Similarity:", round(similarity, 3))
Exercise 3: Create and Query a FAISS Index
Task:
Index a set of support articles and search for the most relevant one based on a query.
Solution:
import faiss
import numpy as np
articles = [
"Resetting your password from the login page",
"Changing your profile photo and bio",
"Canceling your subscription and managing refunds"
]
# Generate embeddings
vectors = [get_embedding(article) for article in articles]
matrix = np.array(vectors).astype("float32")
faiss.normalize_L2(matrix)
# Create FAISS index
index = faiss.IndexFlatIP(len(matrix[0]))
index.add(matrix)
# Query
query = "How do I cancel my membership?"
query_vec = np.array([get_embedding(query)]).astype("float32")
faiss.normalize_L2(query_vec)
# Search
similarities, idxs = index.search(query_vec, k=2)
for i in idxs[0]:
print("📄 Match:", articles[i])
Exercise 4: Build a Simple Semantic Search App with Pinecone
Task:
Use Pinecone to store embeddings and return the best-matching document for a user question.
Solution:
import pinecone
pinecone.init(
api_key=os.getenv("PINECONE_API_KEY"),
environment=os.getenv("PINECONE_ENV")
)
# Create or connect to index
index_name = "quick-search"
if index_name not in pinecone.list_indexes():
pinecone.create_index(index_name, dimension=1536, metric="cosine")
index = pinecone.Index(index_name)
docs = {
"a1": "How to export data from your dashboard",
"a2": "Steps to upgrade your subscription plan",
"a3": "Changing your registered email address"
}
# Insert documents
to_upsert = [
(doc_id, get_embedding(text), {"text": text})
for doc_id, text in docs.items()
]
index.upsert(vectors=to_upsert)
# Search
user_query = "Can I switch to a higher plan?"
query_vec = get_embedding(user_query)
results = index.query(vector=query_vec, top_k=1, include_metadata=True)
print("🔍 Top match:", results["matches"][0]["metadata"]["text"])
Exercise 5: Recommend Similar Blog Posts
Task:
Use embeddings to recommend blog posts that are semantically related to a given one.
Solution:
posts = [
"10 Python tricks you didn’t know",
"Deploying machine learning models with FastAPI",
"Understanding transformer models in NLP",
"Intro to JavaScript for data scientists"
]
# Embed posts
post_embeddings = [get_embedding(post) for post in posts]
# Define base post
base = "How transformers work in deep learning"
base_vec = get_embedding(base)
# Compute similarity
scores = [dot(base_vec, vec) / (norm(base_vec) * norm(vec)) for vec in post_embeddings]
top_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:2]
print("📚 Recommended Posts:")
for i in top_indices:
print("-", posts[i])
Summary of What You Practiced
- Creating OpenAI embeddings from raw text
- Comparing vectors with cosine similarity
- Building a FAISS index for fast local vector search
- Using Pinecone to persist and query vectors in the cloud
- Making real-world recommendations based on semantic similarity
These exercises have equipped you with the core workflows that power intelligent assistants, chatbots, recommendation systems, and semantic search tools at scale.