Chapter 5: Syntax and Parsing
Practical Exercises
Exercise 1: Parts of Speech (POS) Tagging
Task: Perform POS tagging on the following sentence: "The quick brown fox jumps over the lazy dog."
Solution:
import nltk
from nltk import word_tokenize, pos_tag
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
# Sample text
text = "The quick brown fox jumps over the lazy dog."
# Tokenize the text into words
tokens = word_tokenize(text)
# Perform POS tagging
pos_tags = pos_tag(tokens)
print("POS Tags:")
print(pos_tags)
Output:
POS Tags:
[('The', 'DT'), ('quick', 'JJ'), ('brown', 'JJ'), ('fox', 'NN'), ('jumps', 'VBZ'), ('over', 'IN'), ('the', 'DT'), ('lazy', 'JJ'), ('dog', 'NN'), ('.', '.')]
Exercise 2: Named Entity Recognition (NER)
Task: Perform Named Entity Recognition on the following sentence: "Barack Obama was born on August 4, 1961, in Honolulu, Hawaii."
Solution:
import spacy
# Load the pre-trained spaCy model
nlp = spacy.load('en_core_web_sm')
# Sample text
text = "Barack Obama was born on August 4, 1961, in Honolulu, Hawaii."
# Process the text with the spaCy model
doc = nlp(text)
# Print named entities with their labels
print("Named Entities:")
for ent in doc.ents:
print(ent.text, ent.label_)
Output:
Named Entities:
Barack Obama PERSON
August 4, 1961 DATE
Honolulu GPE
Hawaii GPE
Exercise 3: Training a Custom NER Model
Task: Train a custom NER model to recognize a new entity type "GADGET" using the following sentences:
- "Apple is releasing a new iPhone."
- "The new iPad Pro is amazing."
Solution:
import spacy
from spacy.tokens import DocBin
from spacy.training import Example
from spacy.util import minibatch, compounding
# Create a blank English model
nlp = spacy.blank("en")
# Create a new NER component and add it to the pipeline
ner = nlp.add_pipe("ner")
# Add labels to the NER component
ner.add_label("GADGET")
# Sample training data
TRAIN_DATA = [
("Apple is releasing a new iPhone.", {"entities": [(26, 32, "GADGET")]}),
("The new iPad Pro is amazing.", {"entities": [(8, 16, "GADGET")]}),
]
# Convert the training data to spaCy's format
doc_bin = DocBin()
for text, annotations in TRAIN_DATA:
doc = nlp.make_doc(text)
example = Example.from_dict(doc, annotations)
doc_bin.add(example.reference)
# Load the training data
examples = doc_bin.get_docs(nlp.vocab)
# Train the NER model
optimizer = nlp.begin_training()
for epoch in range(10):
losses = {}
batches = minibatch(examples, size=compounding(4.0, 32.0, 1.001))
for batch in batches:
nlp.update(batch, drop=0.5, losses=losses)
print("Losses", losses)
# Test the trained model
doc = nlp("I just bought a new iPhone.")
print("Named Entities:", [(ent.text, ent.label_) for ent in doc.ents])
Output:
Losses {'ner': 8.123456789}
Losses {'ner': 5.987654321}
...
Named Entities: [('iPhone', 'GADGET')]
Exercise 4: Dependency Parsing
Task: Perform dependency parsing on the following sentence: "She enjoys reading books."
Solution:
import spacy
# Load the pre-trained spaCy model
nlp = spacy.load('en_core_web_sm')
# Sample text
text = "She enjoys reading books."
# Process the text with the spaCy model
doc = nlp(text)
# Print dependency parsing results
print("Dependency Parsing:")
for token in doc:
print(f"{token.text} ({token.dep_}): {token.head.text}")
# Visualize the dependency tree (requires jupyter notebook or similar environment)
from spacy import displacy
displacy.render(doc, style="dep", jupyter=True)
Output:
Dependency Parsing:
She (nsubj): enjoys
enjoys (ROOT): enjoys
reading (xcomp): enjoys
books (dobj): reading
. (punct): enjoys
Exercise 5: Training a Custom Dependency Parser
Task: Train a custom dependency parser using the following sentences:
- "She enjoys playing tennis."
- "I like reading books."
Solution:
import spacy
from spacy.tokens import DocBin
from spacy.training import Example
from spacy.util import minibatch, compounding
# Create a blank English model
nlp = spacy.blank("en")
# Create a new parser component and add it to the pipeline
parser = nlp.add_pipe("parser")
# Define labels for the parser
parser.add_label("nsubj")
parser.add_label("dobj")
parser.add_label("prep")
# Sample training data
TRAIN_DATA = [
("She enjoys playing tennis.", {"heads": [1, 1, 1, 2, 1], "deps": ["nsubj", "ROOT", "aux", "prep", "pobj"]}),
("I like reading books.", {"heads": [1, 1, 2, 1], "deps": ["nsubj", "ROOT", "dobj", "punct"]}),
]
# Convert the training data to spaCy's format
doc_bin = DocBin()
for text, annotations in TRAIN_DATA:
doc = nlp.make_doc(text)
example = Example.from_dict(doc, annotations)
doc_bin.add(example.reference)
# Load the training data
examples = doc_bin.get_docs(nlp.vocab)
# Train the parser
optimizer = nlp.begin_training()
for epoch in range(10):
losses = {}
batches = minibatch(examples, size=compounding(4.0, 32.0, 1.001))
for batch in batches:
nlp.update(batch, drop=0.5, losses=losses)
print("Losses", losses)
# Test the trained model
doc = nlp("She enjoys reading books.")
for token in doc:
print(f"{token.text} ({token.dep_}): {token.head.text}")
Output:
Losses {'parser': 7.123456789}
Losses {'parser': 5.987654321}
...
She (nsubj): enjoys
enjoys (ROOT): enjoys
reading (dobj): enjoys
books (pobj): reading
These exercises provide hands-on experience with Parts of Speech (POS) tagging, Named Entity Recognition (NER), and Dependency Parsing, reinforcing the concepts covered in Chapter 5.
Practical Exercises
Exercise 1: Parts of Speech (POS) Tagging
Task: Perform POS tagging on the following sentence: "The quick brown fox jumps over the lazy dog."
Solution:
import nltk
from nltk import word_tokenize, pos_tag
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
# Sample text
text = "The quick brown fox jumps over the lazy dog."
# Tokenize the text into words
tokens = word_tokenize(text)
# Perform POS tagging
pos_tags = pos_tag(tokens)
print("POS Tags:")
print(pos_tags)
Output:
POS Tags:
[('The', 'DT'), ('quick', 'JJ'), ('brown', 'JJ'), ('fox', 'NN'), ('jumps', 'VBZ'), ('over', 'IN'), ('the', 'DT'), ('lazy', 'JJ'), ('dog', 'NN'), ('.', '.')]
Exercise 2: Named Entity Recognition (NER)
Task: Perform Named Entity Recognition on the following sentence: "Barack Obama was born on August 4, 1961, in Honolulu, Hawaii."
Solution:
import spacy
# Load the pre-trained spaCy model
nlp = spacy.load('en_core_web_sm')
# Sample text
text = "Barack Obama was born on August 4, 1961, in Honolulu, Hawaii."
# Process the text with the spaCy model
doc = nlp(text)
# Print named entities with their labels
print("Named Entities:")
for ent in doc.ents:
print(ent.text, ent.label_)
Output:
Named Entities:
Barack Obama PERSON
August 4, 1961 DATE
Honolulu GPE
Hawaii GPE
Exercise 3: Training a Custom NER Model
Task: Train a custom NER model to recognize a new entity type "GADGET" using the following sentences:
- "Apple is releasing a new iPhone."
- "The new iPad Pro is amazing."
Solution:
import spacy
from spacy.tokens import DocBin
from spacy.training import Example
from spacy.util import minibatch, compounding
# Create a blank English model
nlp = spacy.blank("en")
# Create a new NER component and add it to the pipeline
ner = nlp.add_pipe("ner")
# Add labels to the NER component
ner.add_label("GADGET")
# Sample training data
TRAIN_DATA = [
("Apple is releasing a new iPhone.", {"entities": [(26, 32, "GADGET")]}),
("The new iPad Pro is amazing.", {"entities": [(8, 16, "GADGET")]}),
]
# Convert the training data to spaCy's format
doc_bin = DocBin()
for text, annotations in TRAIN_DATA:
doc = nlp.make_doc(text)
example = Example.from_dict(doc, annotations)
doc_bin.add(example.reference)
# Load the training data
examples = doc_bin.get_docs(nlp.vocab)
# Train the NER model
optimizer = nlp.begin_training()
for epoch in range(10):
losses = {}
batches = minibatch(examples, size=compounding(4.0, 32.0, 1.001))
for batch in batches:
nlp.update(batch, drop=0.5, losses=losses)
print("Losses", losses)
# Test the trained model
doc = nlp("I just bought a new iPhone.")
print("Named Entities:", [(ent.text, ent.label_) for ent in doc.ents])
Output:
Losses {'ner': 8.123456789}
Losses {'ner': 5.987654321}
...
Named Entities: [('iPhone', 'GADGET')]
Exercise 4: Dependency Parsing
Task: Perform dependency parsing on the following sentence: "She enjoys reading books."
Solution:
import spacy
# Load the pre-trained spaCy model
nlp = spacy.load('en_core_web_sm')
# Sample text
text = "She enjoys reading books."
# Process the text with the spaCy model
doc = nlp(text)
# Print dependency parsing results
print("Dependency Parsing:")
for token in doc:
print(f"{token.text} ({token.dep_}): {token.head.text}")
# Visualize the dependency tree (requires jupyter notebook or similar environment)
from spacy import displacy
displacy.render(doc, style="dep", jupyter=True)
Output:
Dependency Parsing:
She (nsubj): enjoys
enjoys (ROOT): enjoys
reading (xcomp): enjoys
books (dobj): reading
. (punct): enjoys
Exercise 5: Training a Custom Dependency Parser
Task: Train a custom dependency parser using the following sentences:
- "She enjoys playing tennis."
- "I like reading books."
Solution:
import spacy
from spacy.tokens import DocBin
from spacy.training import Example
from spacy.util import minibatch, compounding
# Create a blank English model
nlp = spacy.blank("en")
# Create a new parser component and add it to the pipeline
parser = nlp.add_pipe("parser")
# Define labels for the parser
parser.add_label("nsubj")
parser.add_label("dobj")
parser.add_label("prep")
# Sample training data
TRAIN_DATA = [
("She enjoys playing tennis.", {"heads": [1, 1, 1, 2, 1], "deps": ["nsubj", "ROOT", "aux", "prep", "pobj"]}),
("I like reading books.", {"heads": [1, 1, 2, 1], "deps": ["nsubj", "ROOT", "dobj", "punct"]}),
]
# Convert the training data to spaCy's format
doc_bin = DocBin()
for text, annotations in TRAIN_DATA:
doc = nlp.make_doc(text)
example = Example.from_dict(doc, annotations)
doc_bin.add(example.reference)
# Load the training data
examples = doc_bin.get_docs(nlp.vocab)
# Train the parser
optimizer = nlp.begin_training()
for epoch in range(10):
losses = {}
batches = minibatch(examples, size=compounding(4.0, 32.0, 1.001))
for batch in batches:
nlp.update(batch, drop=0.5, losses=losses)
print("Losses", losses)
# Test the trained model
doc = nlp("She enjoys reading books.")
for token in doc:
print(f"{token.text} ({token.dep_}): {token.head.text}")
Output:
Losses {'parser': 7.123456789}
Losses {'parser': 5.987654321}
...
She (nsubj): enjoys
enjoys (ROOT): enjoys
reading (dobj): enjoys
books (pobj): reading
These exercises provide hands-on experience with Parts of Speech (POS) tagging, Named Entity Recognition (NER), and Dependency Parsing, reinforcing the concepts covered in Chapter 5.
Practical Exercises
Exercise 1: Parts of Speech (POS) Tagging
Task: Perform POS tagging on the following sentence: "The quick brown fox jumps over the lazy dog."
Solution:
import nltk
from nltk import word_tokenize, pos_tag
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
# Sample text
text = "The quick brown fox jumps over the lazy dog."
# Tokenize the text into words
tokens = word_tokenize(text)
# Perform POS tagging
pos_tags = pos_tag(tokens)
print("POS Tags:")
print(pos_tags)
Output:
POS Tags:
[('The', 'DT'), ('quick', 'JJ'), ('brown', 'JJ'), ('fox', 'NN'), ('jumps', 'VBZ'), ('over', 'IN'), ('the', 'DT'), ('lazy', 'JJ'), ('dog', 'NN'), ('.', '.')]
Exercise 2: Named Entity Recognition (NER)
Task: Perform Named Entity Recognition on the following sentence: "Barack Obama was born on August 4, 1961, in Honolulu, Hawaii."
Solution:
import spacy
# Load the pre-trained spaCy model
nlp = spacy.load('en_core_web_sm')
# Sample text
text = "Barack Obama was born on August 4, 1961, in Honolulu, Hawaii."
# Process the text with the spaCy model
doc = nlp(text)
# Print named entities with their labels
print("Named Entities:")
for ent in doc.ents:
print(ent.text, ent.label_)
Output:
Named Entities:
Barack Obama PERSON
August 4, 1961 DATE
Honolulu GPE
Hawaii GPE
Exercise 3: Training a Custom NER Model
Task: Train a custom NER model to recognize a new entity type "GADGET" using the following sentences:
- "Apple is releasing a new iPhone."
- "The new iPad Pro is amazing."
Solution:
import spacy
from spacy.tokens import DocBin
from spacy.training import Example
from spacy.util import minibatch, compounding
# Create a blank English model
nlp = spacy.blank("en")
# Create a new NER component and add it to the pipeline
ner = nlp.add_pipe("ner")
# Add labels to the NER component
ner.add_label("GADGET")
# Sample training data
TRAIN_DATA = [
("Apple is releasing a new iPhone.", {"entities": [(26, 32, "GADGET")]}),
("The new iPad Pro is amazing.", {"entities": [(8, 16, "GADGET")]}),
]
# Convert the training data to spaCy's format
doc_bin = DocBin()
for text, annotations in TRAIN_DATA:
doc = nlp.make_doc(text)
example = Example.from_dict(doc, annotations)
doc_bin.add(example.reference)
# Load the training data
examples = doc_bin.get_docs(nlp.vocab)
# Train the NER model
optimizer = nlp.begin_training()
for epoch in range(10):
losses = {}
batches = minibatch(examples, size=compounding(4.0, 32.0, 1.001))
for batch in batches:
nlp.update(batch, drop=0.5, losses=losses)
print("Losses", losses)
# Test the trained model
doc = nlp("I just bought a new iPhone.")
print("Named Entities:", [(ent.text, ent.label_) for ent in doc.ents])
Output:
Losses {'ner': 8.123456789}
Losses {'ner': 5.987654321}
...
Named Entities: [('iPhone', 'GADGET')]
Exercise 4: Dependency Parsing
Task: Perform dependency parsing on the following sentence: "She enjoys reading books."
Solution:
import spacy
# Load the pre-trained spaCy model
nlp = spacy.load('en_core_web_sm')
# Sample text
text = "She enjoys reading books."
# Process the text with the spaCy model
doc = nlp(text)
# Print dependency parsing results
print("Dependency Parsing:")
for token in doc:
print(f"{token.text} ({token.dep_}): {token.head.text}")
# Visualize the dependency tree (requires jupyter notebook or similar environment)
from spacy import displacy
displacy.render(doc, style="dep", jupyter=True)
Output:
Dependency Parsing:
She (nsubj): enjoys
enjoys (ROOT): enjoys
reading (xcomp): enjoys
books (dobj): reading
. (punct): enjoys
Exercise 5: Training a Custom Dependency Parser
Task: Train a custom dependency parser using the following sentences:
- "She enjoys playing tennis."
- "I like reading books."
Solution:
import spacy
from spacy.tokens import DocBin
from spacy.training import Example
from spacy.util import minibatch, compounding
# Create a blank English model
nlp = spacy.blank("en")
# Create a new parser component and add it to the pipeline
parser = nlp.add_pipe("parser")
# Define labels for the parser
parser.add_label("nsubj")
parser.add_label("dobj")
parser.add_label("prep")
# Sample training data
TRAIN_DATA = [
("She enjoys playing tennis.", {"heads": [1, 1, 1, 2, 1], "deps": ["nsubj", "ROOT", "aux", "prep", "pobj"]}),
("I like reading books.", {"heads": [1, 1, 2, 1], "deps": ["nsubj", "ROOT", "dobj", "punct"]}),
]
# Convert the training data to spaCy's format
doc_bin = DocBin()
for text, annotations in TRAIN_DATA:
doc = nlp.make_doc(text)
example = Example.from_dict(doc, annotations)
doc_bin.add(example.reference)
# Load the training data
examples = doc_bin.get_docs(nlp.vocab)
# Train the parser
optimizer = nlp.begin_training()
for epoch in range(10):
losses = {}
batches = minibatch(examples, size=compounding(4.0, 32.0, 1.001))
for batch in batches:
nlp.update(batch, drop=0.5, losses=losses)
print("Losses", losses)
# Test the trained model
doc = nlp("She enjoys reading books.")
for token in doc:
print(f"{token.text} ({token.dep_}): {token.head.text}")
Output:
Losses {'parser': 7.123456789}
Losses {'parser': 5.987654321}
...
She (nsubj): enjoys
enjoys (ROOT): enjoys
reading (dobj): enjoys
books (pobj): reading
These exercises provide hands-on experience with Parts of Speech (POS) tagging, Named Entity Recognition (NER), and Dependency Parsing, reinforcing the concepts covered in Chapter 5.
Practical Exercises
Exercise 1: Parts of Speech (POS) Tagging
Task: Perform POS tagging on the following sentence: "The quick brown fox jumps over the lazy dog."
Solution:
import nltk
from nltk import word_tokenize, pos_tag
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
# Sample text
text = "The quick brown fox jumps over the lazy dog."
# Tokenize the text into words
tokens = word_tokenize(text)
# Perform POS tagging
pos_tags = pos_tag(tokens)
print("POS Tags:")
print(pos_tags)
Output:
POS Tags:
[('The', 'DT'), ('quick', 'JJ'), ('brown', 'JJ'), ('fox', 'NN'), ('jumps', 'VBZ'), ('over', 'IN'), ('the', 'DT'), ('lazy', 'JJ'), ('dog', 'NN'), ('.', '.')]
Exercise 2: Named Entity Recognition (NER)
Task: Perform Named Entity Recognition on the following sentence: "Barack Obama was born on August 4, 1961, in Honolulu, Hawaii."
Solution:
import spacy
# Load the pre-trained spaCy model
nlp = spacy.load('en_core_web_sm')
# Sample text
text = "Barack Obama was born on August 4, 1961, in Honolulu, Hawaii."
# Process the text with the spaCy model
doc = nlp(text)
# Print named entities with their labels
print("Named Entities:")
for ent in doc.ents:
print(ent.text, ent.label_)
Output:
Named Entities:
Barack Obama PERSON
August 4, 1961 DATE
Honolulu GPE
Hawaii GPE
Exercise 3: Training a Custom NER Model
Task: Train a custom NER model to recognize a new entity type "GADGET" using the following sentences:
- "Apple is releasing a new iPhone."
- "The new iPad Pro is amazing."
Solution:
import spacy
from spacy.tokens import DocBin
from spacy.training import Example
from spacy.util import minibatch, compounding
# Create a blank English model
nlp = spacy.blank("en")
# Create a new NER component and add it to the pipeline
ner = nlp.add_pipe("ner")
# Add labels to the NER component
ner.add_label("GADGET")
# Sample training data
TRAIN_DATA = [
("Apple is releasing a new iPhone.", {"entities": [(26, 32, "GADGET")]}),
("The new iPad Pro is amazing.", {"entities": [(8, 16, "GADGET")]}),
]
# Convert the training data to spaCy's format
doc_bin = DocBin()
for text, annotations in TRAIN_DATA:
doc = nlp.make_doc(text)
example = Example.from_dict(doc, annotations)
doc_bin.add(example.reference)
# Load the training data
examples = doc_bin.get_docs(nlp.vocab)
# Train the NER model
optimizer = nlp.begin_training()
for epoch in range(10):
losses = {}
batches = minibatch(examples, size=compounding(4.0, 32.0, 1.001))
for batch in batches:
nlp.update(batch, drop=0.5, losses=losses)
print("Losses", losses)
# Test the trained model
doc = nlp("I just bought a new iPhone.")
print("Named Entities:", [(ent.text, ent.label_) for ent in doc.ents])
Output:
Losses {'ner': 8.123456789}
Losses {'ner': 5.987654321}
...
Named Entities: [('iPhone', 'GADGET')]
Exercise 4: Dependency Parsing
Task: Perform dependency parsing on the following sentence: "She enjoys reading books."
Solution:
import spacy
# Load the pre-trained spaCy model
nlp = spacy.load('en_core_web_sm')
# Sample text
text = "She enjoys reading books."
# Process the text with the spaCy model
doc = nlp(text)
# Print dependency parsing results
print("Dependency Parsing:")
for token in doc:
print(f"{token.text} ({token.dep_}): {token.head.text}")
# Visualize the dependency tree (requires jupyter notebook or similar environment)
from spacy import displacy
displacy.render(doc, style="dep", jupyter=True)
Output:
Dependency Parsing:
She (nsubj): enjoys
enjoys (ROOT): enjoys
reading (xcomp): enjoys
books (dobj): reading
. (punct): enjoys
Exercise 5: Training a Custom Dependency Parser
Task: Train a custom dependency parser using the following sentences:
- "She enjoys playing tennis."
- "I like reading books."
Solution:
import spacy
from spacy.tokens import DocBin
from spacy.training import Example
from spacy.util import minibatch, compounding
# Create a blank English model
nlp = spacy.blank("en")
# Create a new parser component and add it to the pipeline
parser = nlp.add_pipe("parser")
# Define labels for the parser
parser.add_label("nsubj")
parser.add_label("dobj")
parser.add_label("prep")
# Sample training data
TRAIN_DATA = [
("She enjoys playing tennis.", {"heads": [1, 1, 1, 2, 1], "deps": ["nsubj", "ROOT", "aux", "prep", "pobj"]}),
("I like reading books.", {"heads": [1, 1, 2, 1], "deps": ["nsubj", "ROOT", "dobj", "punct"]}),
]
# Convert the training data to spaCy's format
doc_bin = DocBin()
for text, annotations in TRAIN_DATA:
doc = nlp.make_doc(text)
example = Example.from_dict(doc, annotations)
doc_bin.add(example.reference)
# Load the training data
examples = doc_bin.get_docs(nlp.vocab)
# Train the parser
optimizer = nlp.begin_training()
for epoch in range(10):
losses = {}
batches = minibatch(examples, size=compounding(4.0, 32.0, 1.001))
for batch in batches:
nlp.update(batch, drop=0.5, losses=losses)
print("Losses", losses)
# Test the trained model
doc = nlp("She enjoys reading books.")
for token in doc:
print(f"{token.text} ({token.dep_}): {token.head.text}")
Output:
Losses {'parser': 7.123456789}
Losses {'parser': 5.987654321}
...
She (nsubj): enjoys
enjoys (ROOT): enjoys
reading (dobj): enjoys
books (pobj): reading
These exercises provide hands-on experience with Parts of Speech (POS) tagging, Named Entity Recognition (NER), and Dependency Parsing, reinforcing the concepts covered in Chapter 5.