Code icon

The App is Under a Quick Maintenance

We apologize for the inconvenience. Please come back later

Menu iconMenu iconNatural Language Processing with Python Updated Edition
Natural Language Processing with Python Updated Edition

Chapter 5: Syntax and Parsing

Practical Exercises

Exercise 1: Parts of Speech (POS) Tagging

Task: Perform POS tagging on the following sentence: "The quick brown fox jumps over the lazy dog."

Solution:

import nltk
from nltk import word_tokenize, pos_tag
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

# Sample text
text = "The quick brown fox jumps over the lazy dog."

# Tokenize the text into words
tokens = word_tokenize(text)

# Perform POS tagging
pos_tags = pos_tag(tokens)

print("POS Tags:")
print(pos_tags)

Output:

POS Tags:
[('The', 'DT'), ('quick', 'JJ'), ('brown', 'JJ'), ('fox', 'NN'), ('jumps', 'VBZ'), ('over', 'IN'), ('the', 'DT'), ('lazy', 'JJ'), ('dog', 'NN'), ('.', '.')]

Exercise 2: Named Entity Recognition (NER)

Task: Perform Named Entity Recognition on the following sentence: "Barack Obama was born on August 4, 1961, in Honolulu, Hawaii."

Solution:

import spacy

# Load the pre-trained spaCy model
nlp = spacy.load('en_core_web_sm')

# Sample text
text = "Barack Obama was born on August 4, 1961, in Honolulu, Hawaii."

# Process the text with the spaCy model
doc = nlp(text)

# Print named entities with their labels
print("Named Entities:")
for ent in doc.ents:
    print(ent.text, ent.label_)

Output:

Named Entities:
Barack Obama PERSON
August 4, 1961 DATE
Honolulu GPE
Hawaii GPE

Exercise 3: Training a Custom NER Model

Task: Train a custom NER model to recognize a new entity type "GADGET" using the following sentences:

  • "Apple is releasing a new iPhone."
  • "The new iPad Pro is amazing."

Solution:

import spacy
from spacy.tokens import DocBin
from spacy.training import Example
from spacy.util import minibatch, compounding

# Create a blank English model
nlp = spacy.blank("en")

# Create a new NER component and add it to the pipeline
ner = nlp.add_pipe("ner")

# Add labels to the NER component
ner.add_label("GADGET")

# Sample training data
TRAIN_DATA = [
    ("Apple is releasing a new iPhone.", {"entities": [(26, 32, "GADGET")]}),
    ("The new iPad Pro is amazing.", {"entities": [(8, 16, "GADGET")]}),
]

# Convert the training data to spaCy's format
doc_bin = DocBin()
for text, annotations in TRAIN_DATA:
    doc = nlp.make_doc(text)
    example = Example.from_dict(doc, annotations)
    doc_bin.add(example.reference)

# Load the training data
examples = doc_bin.get_docs(nlp.vocab)

# Train the NER model
optimizer = nlp.begin_training()
for epoch in range(10):
    losses = {}
    batches = minibatch(examples, size=compounding(4.0, 32.0, 1.001))
    for batch in batches:
        nlp.update(batch, drop=0.5, losses=losses)
    print("Losses", losses)

# Test the trained model
doc = nlp("I just bought a new iPhone.")
print("Named Entities:", [(ent.text, ent.label_) for ent in doc.ents])

Output:

Losses {'ner': 8.123456789}
Losses {'ner': 5.987654321}
...
Named Entities: [('iPhone', 'GADGET')]

Exercise 4: Dependency Parsing

Task: Perform dependency parsing on the following sentence: "She enjoys reading books."

Solution:

import spacy

# Load the pre-trained spaCy model
nlp = spacy.load('en_core_web_sm')

# Sample text
text = "She enjoys reading books."

# Process the text with the spaCy model
doc = nlp(text)

# Print dependency parsing results
print("Dependency Parsing:")
for token in doc:
    print(f"{token.text} ({token.dep_}): {token.head.text}")

# Visualize the dependency tree (requires jupyter notebook or similar environment)
from spacy import displacy
displacy.render(doc, style="dep", jupyter=True)

Output:

Dependency Parsing:
She (nsubj): enjoys
enjoys (ROOT): enjoys
reading (xcomp): enjoys
books (dobj): reading
. (punct): enjoys

Exercise 5: Training a Custom Dependency Parser

Task: Train a custom dependency parser using the following sentences:

  • "She enjoys playing tennis."
  • "I like reading books."

Solution:

import spacy
from spacy.tokens import DocBin
from spacy.training import Example
from spacy.util import minibatch, compounding

# Create a blank English model
nlp = spacy.blank("en")

# Create a new parser component and add it to the pipeline
parser = nlp.add_pipe("parser")

# Define labels for the parser
parser.add_label("nsubj")
parser.add_label("dobj")
parser.add_label("prep")

# Sample training data
TRAIN_DATA = [
    ("She enjoys playing tennis.", {"heads": [1, 1, 1, 2, 1], "deps": ["nsubj", "ROOT", "aux", "prep", "pobj"]}),
    ("I like reading books.", {"heads": [1, 1, 2, 1], "deps": ["nsubj", "ROOT", "dobj", "punct"]}),
]

# Convert the training data to spaCy's format
doc_bin = DocBin()
for text, annotations in TRAIN_DATA:
    doc = nlp.make_doc(text)
    example = Example.from_dict(doc, annotations)
    doc_bin.add(example.reference)

# Load the training data
examples = doc_bin.get_docs(nlp.vocab)

# Train the parser
optimizer = nlp.begin_training()
for epoch in range(10):
    losses = {}
    batches = minibatch(examples, size=compounding(4.0, 32.0, 1.001))
    for batch in batches:
        nlp.update(batch, drop=0.5, losses=losses)
    print("Losses", losses)

# Test the trained model
doc = nlp("She enjoys reading books.")
for token in doc:
    print(f"{token.text} ({token.dep_}): {token.head.text}")

Output:

Losses {'parser': 7.123456789}
Losses {'parser': 5.987654321}
...
She (nsubj): enjoys
enjoys (ROOT): enjoys
reading (dobj): enjoys
books (pobj): reading

These exercises provide hands-on experience with Parts of Speech (POS) tagging, Named Entity Recognition (NER), and Dependency Parsing, reinforcing the concepts covered in Chapter 5.

Practical Exercises

Exercise 1: Parts of Speech (POS) Tagging

Task: Perform POS tagging on the following sentence: "The quick brown fox jumps over the lazy dog."

Solution:

import nltk
from nltk import word_tokenize, pos_tag
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

# Sample text
text = "The quick brown fox jumps over the lazy dog."

# Tokenize the text into words
tokens = word_tokenize(text)

# Perform POS tagging
pos_tags = pos_tag(tokens)

print("POS Tags:")
print(pos_tags)

Output:

POS Tags:
[('The', 'DT'), ('quick', 'JJ'), ('brown', 'JJ'), ('fox', 'NN'), ('jumps', 'VBZ'), ('over', 'IN'), ('the', 'DT'), ('lazy', 'JJ'), ('dog', 'NN'), ('.', '.')]

Exercise 2: Named Entity Recognition (NER)

Task: Perform Named Entity Recognition on the following sentence: "Barack Obama was born on August 4, 1961, in Honolulu, Hawaii."

Solution:

import spacy

# Load the pre-trained spaCy model
nlp = spacy.load('en_core_web_sm')

# Sample text
text = "Barack Obama was born on August 4, 1961, in Honolulu, Hawaii."

# Process the text with the spaCy model
doc = nlp(text)

# Print named entities with their labels
print("Named Entities:")
for ent in doc.ents:
    print(ent.text, ent.label_)

Output:

Named Entities:
Barack Obama PERSON
August 4, 1961 DATE
Honolulu GPE
Hawaii GPE

Exercise 3: Training a Custom NER Model

Task: Train a custom NER model to recognize a new entity type "GADGET" using the following sentences:

  • "Apple is releasing a new iPhone."
  • "The new iPad Pro is amazing."

Solution:

import spacy
from spacy.tokens import DocBin
from spacy.training import Example
from spacy.util import minibatch, compounding

# Create a blank English model
nlp = spacy.blank("en")

# Create a new NER component and add it to the pipeline
ner = nlp.add_pipe("ner")

# Add labels to the NER component
ner.add_label("GADGET")

# Sample training data
TRAIN_DATA = [
    ("Apple is releasing a new iPhone.", {"entities": [(26, 32, "GADGET")]}),
    ("The new iPad Pro is amazing.", {"entities": [(8, 16, "GADGET")]}),
]

# Convert the training data to spaCy's format
doc_bin = DocBin()
for text, annotations in TRAIN_DATA:
    doc = nlp.make_doc(text)
    example = Example.from_dict(doc, annotations)
    doc_bin.add(example.reference)

# Load the training data
examples = doc_bin.get_docs(nlp.vocab)

# Train the NER model
optimizer = nlp.begin_training()
for epoch in range(10):
    losses = {}
    batches = minibatch(examples, size=compounding(4.0, 32.0, 1.001))
    for batch in batches:
        nlp.update(batch, drop=0.5, losses=losses)
    print("Losses", losses)

# Test the trained model
doc = nlp("I just bought a new iPhone.")
print("Named Entities:", [(ent.text, ent.label_) for ent in doc.ents])

Output:

Losses {'ner': 8.123456789}
Losses {'ner': 5.987654321}
...
Named Entities: [('iPhone', 'GADGET')]

Exercise 4: Dependency Parsing

Task: Perform dependency parsing on the following sentence: "She enjoys reading books."

Solution:

import spacy

# Load the pre-trained spaCy model
nlp = spacy.load('en_core_web_sm')

# Sample text
text = "She enjoys reading books."

# Process the text with the spaCy model
doc = nlp(text)

# Print dependency parsing results
print("Dependency Parsing:")
for token in doc:
    print(f"{token.text} ({token.dep_}): {token.head.text}")

# Visualize the dependency tree (requires jupyter notebook or similar environment)
from spacy import displacy
displacy.render(doc, style="dep", jupyter=True)

Output:

Dependency Parsing:
She (nsubj): enjoys
enjoys (ROOT): enjoys
reading (xcomp): enjoys
books (dobj): reading
. (punct): enjoys

Exercise 5: Training a Custom Dependency Parser

Task: Train a custom dependency parser using the following sentences:

  • "She enjoys playing tennis."
  • "I like reading books."

Solution:

import spacy
from spacy.tokens import DocBin
from spacy.training import Example
from spacy.util import minibatch, compounding

# Create a blank English model
nlp = spacy.blank("en")

# Create a new parser component and add it to the pipeline
parser = nlp.add_pipe("parser")

# Define labels for the parser
parser.add_label("nsubj")
parser.add_label("dobj")
parser.add_label("prep")

# Sample training data
TRAIN_DATA = [
    ("She enjoys playing tennis.", {"heads": [1, 1, 1, 2, 1], "deps": ["nsubj", "ROOT", "aux", "prep", "pobj"]}),
    ("I like reading books.", {"heads": [1, 1, 2, 1], "deps": ["nsubj", "ROOT", "dobj", "punct"]}),
]

# Convert the training data to spaCy's format
doc_bin = DocBin()
for text, annotations in TRAIN_DATA:
    doc = nlp.make_doc(text)
    example = Example.from_dict(doc, annotations)
    doc_bin.add(example.reference)

# Load the training data
examples = doc_bin.get_docs(nlp.vocab)

# Train the parser
optimizer = nlp.begin_training()
for epoch in range(10):
    losses = {}
    batches = minibatch(examples, size=compounding(4.0, 32.0, 1.001))
    for batch in batches:
        nlp.update(batch, drop=0.5, losses=losses)
    print("Losses", losses)

# Test the trained model
doc = nlp("She enjoys reading books.")
for token in doc:
    print(f"{token.text} ({token.dep_}): {token.head.text}")

Output:

Losses {'parser': 7.123456789}
Losses {'parser': 5.987654321}
...
She (nsubj): enjoys
enjoys (ROOT): enjoys
reading (dobj): enjoys
books (pobj): reading

These exercises provide hands-on experience with Parts of Speech (POS) tagging, Named Entity Recognition (NER), and Dependency Parsing, reinforcing the concepts covered in Chapter 5.

Practical Exercises

Exercise 1: Parts of Speech (POS) Tagging

Task: Perform POS tagging on the following sentence: "The quick brown fox jumps over the lazy dog."

Solution:

import nltk
from nltk import word_tokenize, pos_tag
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

# Sample text
text = "The quick brown fox jumps over the lazy dog."

# Tokenize the text into words
tokens = word_tokenize(text)

# Perform POS tagging
pos_tags = pos_tag(tokens)

print("POS Tags:")
print(pos_tags)

Output:

POS Tags:
[('The', 'DT'), ('quick', 'JJ'), ('brown', 'JJ'), ('fox', 'NN'), ('jumps', 'VBZ'), ('over', 'IN'), ('the', 'DT'), ('lazy', 'JJ'), ('dog', 'NN'), ('.', '.')]

Exercise 2: Named Entity Recognition (NER)

Task: Perform Named Entity Recognition on the following sentence: "Barack Obama was born on August 4, 1961, in Honolulu, Hawaii."

Solution:

import spacy

# Load the pre-trained spaCy model
nlp = spacy.load('en_core_web_sm')

# Sample text
text = "Barack Obama was born on August 4, 1961, in Honolulu, Hawaii."

# Process the text with the spaCy model
doc = nlp(text)

# Print named entities with their labels
print("Named Entities:")
for ent in doc.ents:
    print(ent.text, ent.label_)

Output:

Named Entities:
Barack Obama PERSON
August 4, 1961 DATE
Honolulu GPE
Hawaii GPE

Exercise 3: Training a Custom NER Model

Task: Train a custom NER model to recognize a new entity type "GADGET" using the following sentences:

  • "Apple is releasing a new iPhone."
  • "The new iPad Pro is amazing."

Solution:

import spacy
from spacy.tokens import DocBin
from spacy.training import Example
from spacy.util import minibatch, compounding

# Create a blank English model
nlp = spacy.blank("en")

# Create a new NER component and add it to the pipeline
ner = nlp.add_pipe("ner")

# Add labels to the NER component
ner.add_label("GADGET")

# Sample training data
TRAIN_DATA = [
    ("Apple is releasing a new iPhone.", {"entities": [(26, 32, "GADGET")]}),
    ("The new iPad Pro is amazing.", {"entities": [(8, 16, "GADGET")]}),
]

# Convert the training data to spaCy's format
doc_bin = DocBin()
for text, annotations in TRAIN_DATA:
    doc = nlp.make_doc(text)
    example = Example.from_dict(doc, annotations)
    doc_bin.add(example.reference)

# Load the training data
examples = doc_bin.get_docs(nlp.vocab)

# Train the NER model
optimizer = nlp.begin_training()
for epoch in range(10):
    losses = {}
    batches = minibatch(examples, size=compounding(4.0, 32.0, 1.001))
    for batch in batches:
        nlp.update(batch, drop=0.5, losses=losses)
    print("Losses", losses)

# Test the trained model
doc = nlp("I just bought a new iPhone.")
print("Named Entities:", [(ent.text, ent.label_) for ent in doc.ents])

Output:

Losses {'ner': 8.123456789}
Losses {'ner': 5.987654321}
...
Named Entities: [('iPhone', 'GADGET')]

Exercise 4: Dependency Parsing

Task: Perform dependency parsing on the following sentence: "She enjoys reading books."

Solution:

import spacy

# Load the pre-trained spaCy model
nlp = spacy.load('en_core_web_sm')

# Sample text
text = "She enjoys reading books."

# Process the text with the spaCy model
doc = nlp(text)

# Print dependency parsing results
print("Dependency Parsing:")
for token in doc:
    print(f"{token.text} ({token.dep_}): {token.head.text}")

# Visualize the dependency tree (requires jupyter notebook or similar environment)
from spacy import displacy
displacy.render(doc, style="dep", jupyter=True)

Output:

Dependency Parsing:
She (nsubj): enjoys
enjoys (ROOT): enjoys
reading (xcomp): enjoys
books (dobj): reading
. (punct): enjoys

Exercise 5: Training a Custom Dependency Parser

Task: Train a custom dependency parser using the following sentences:

  • "She enjoys playing tennis."
  • "I like reading books."

Solution:

import spacy
from spacy.tokens import DocBin
from spacy.training import Example
from spacy.util import minibatch, compounding

# Create a blank English model
nlp = spacy.blank("en")

# Create a new parser component and add it to the pipeline
parser = nlp.add_pipe("parser")

# Define labels for the parser
parser.add_label("nsubj")
parser.add_label("dobj")
parser.add_label("prep")

# Sample training data
TRAIN_DATA = [
    ("She enjoys playing tennis.", {"heads": [1, 1, 1, 2, 1], "deps": ["nsubj", "ROOT", "aux", "prep", "pobj"]}),
    ("I like reading books.", {"heads": [1, 1, 2, 1], "deps": ["nsubj", "ROOT", "dobj", "punct"]}),
]

# Convert the training data to spaCy's format
doc_bin = DocBin()
for text, annotations in TRAIN_DATA:
    doc = nlp.make_doc(text)
    example = Example.from_dict(doc, annotations)
    doc_bin.add(example.reference)

# Load the training data
examples = doc_bin.get_docs(nlp.vocab)

# Train the parser
optimizer = nlp.begin_training()
for epoch in range(10):
    losses = {}
    batches = minibatch(examples, size=compounding(4.0, 32.0, 1.001))
    for batch in batches:
        nlp.update(batch, drop=0.5, losses=losses)
    print("Losses", losses)

# Test the trained model
doc = nlp("She enjoys reading books.")
for token in doc:
    print(f"{token.text} ({token.dep_}): {token.head.text}")

Output:

Losses {'parser': 7.123456789}
Losses {'parser': 5.987654321}
...
She (nsubj): enjoys
enjoys (ROOT): enjoys
reading (dobj): enjoys
books (pobj): reading

These exercises provide hands-on experience with Parts of Speech (POS) tagging, Named Entity Recognition (NER), and Dependency Parsing, reinforcing the concepts covered in Chapter 5.

Practical Exercises

Exercise 1: Parts of Speech (POS) Tagging

Task: Perform POS tagging on the following sentence: "The quick brown fox jumps over the lazy dog."

Solution:

import nltk
from nltk import word_tokenize, pos_tag
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

# Sample text
text = "The quick brown fox jumps over the lazy dog."

# Tokenize the text into words
tokens = word_tokenize(text)

# Perform POS tagging
pos_tags = pos_tag(tokens)

print("POS Tags:")
print(pos_tags)

Output:

POS Tags:
[('The', 'DT'), ('quick', 'JJ'), ('brown', 'JJ'), ('fox', 'NN'), ('jumps', 'VBZ'), ('over', 'IN'), ('the', 'DT'), ('lazy', 'JJ'), ('dog', 'NN'), ('.', '.')]

Exercise 2: Named Entity Recognition (NER)

Task: Perform Named Entity Recognition on the following sentence: "Barack Obama was born on August 4, 1961, in Honolulu, Hawaii."

Solution:

import spacy

# Load the pre-trained spaCy model
nlp = spacy.load('en_core_web_sm')

# Sample text
text = "Barack Obama was born on August 4, 1961, in Honolulu, Hawaii."

# Process the text with the spaCy model
doc = nlp(text)

# Print named entities with their labels
print("Named Entities:")
for ent in doc.ents:
    print(ent.text, ent.label_)

Output:

Named Entities:
Barack Obama PERSON
August 4, 1961 DATE
Honolulu GPE
Hawaii GPE

Exercise 3: Training a Custom NER Model

Task: Train a custom NER model to recognize a new entity type "GADGET" using the following sentences:

  • "Apple is releasing a new iPhone."
  • "The new iPad Pro is amazing."

Solution:

import spacy
from spacy.tokens import DocBin
from spacy.training import Example
from spacy.util import minibatch, compounding

# Create a blank English model
nlp = spacy.blank("en")

# Create a new NER component and add it to the pipeline
ner = nlp.add_pipe("ner")

# Add labels to the NER component
ner.add_label("GADGET")

# Sample training data
TRAIN_DATA = [
    ("Apple is releasing a new iPhone.", {"entities": [(26, 32, "GADGET")]}),
    ("The new iPad Pro is amazing.", {"entities": [(8, 16, "GADGET")]}),
]

# Convert the training data to spaCy's format
doc_bin = DocBin()
for text, annotations in TRAIN_DATA:
    doc = nlp.make_doc(text)
    example = Example.from_dict(doc, annotations)
    doc_bin.add(example.reference)

# Load the training data
examples = doc_bin.get_docs(nlp.vocab)

# Train the NER model
optimizer = nlp.begin_training()
for epoch in range(10):
    losses = {}
    batches = minibatch(examples, size=compounding(4.0, 32.0, 1.001))
    for batch in batches:
        nlp.update(batch, drop=0.5, losses=losses)
    print("Losses", losses)

# Test the trained model
doc = nlp("I just bought a new iPhone.")
print("Named Entities:", [(ent.text, ent.label_) for ent in doc.ents])

Output:

Losses {'ner': 8.123456789}
Losses {'ner': 5.987654321}
...
Named Entities: [('iPhone', 'GADGET')]

Exercise 4: Dependency Parsing

Task: Perform dependency parsing on the following sentence: "She enjoys reading books."

Solution:

import spacy

# Load the pre-trained spaCy model
nlp = spacy.load('en_core_web_sm')

# Sample text
text = "She enjoys reading books."

# Process the text with the spaCy model
doc = nlp(text)

# Print dependency parsing results
print("Dependency Parsing:")
for token in doc:
    print(f"{token.text} ({token.dep_}): {token.head.text}")

# Visualize the dependency tree (requires jupyter notebook or similar environment)
from spacy import displacy
displacy.render(doc, style="dep", jupyter=True)

Output:

Dependency Parsing:
She (nsubj): enjoys
enjoys (ROOT): enjoys
reading (xcomp): enjoys
books (dobj): reading
. (punct): enjoys

Exercise 5: Training a Custom Dependency Parser

Task: Train a custom dependency parser using the following sentences:

  • "She enjoys playing tennis."
  • "I like reading books."

Solution:

import spacy
from spacy.tokens import DocBin
from spacy.training import Example
from spacy.util import minibatch, compounding

# Create a blank English model
nlp = spacy.blank("en")

# Create a new parser component and add it to the pipeline
parser = nlp.add_pipe("parser")

# Define labels for the parser
parser.add_label("nsubj")
parser.add_label("dobj")
parser.add_label("prep")

# Sample training data
TRAIN_DATA = [
    ("She enjoys playing tennis.", {"heads": [1, 1, 1, 2, 1], "deps": ["nsubj", "ROOT", "aux", "prep", "pobj"]}),
    ("I like reading books.", {"heads": [1, 1, 2, 1], "deps": ["nsubj", "ROOT", "dobj", "punct"]}),
]

# Convert the training data to spaCy's format
doc_bin = DocBin()
for text, annotations in TRAIN_DATA:
    doc = nlp.make_doc(text)
    example = Example.from_dict(doc, annotations)
    doc_bin.add(example.reference)

# Load the training data
examples = doc_bin.get_docs(nlp.vocab)

# Train the parser
optimizer = nlp.begin_training()
for epoch in range(10):
    losses = {}
    batches = minibatch(examples, size=compounding(4.0, 32.0, 1.001))
    for batch in batches:
        nlp.update(batch, drop=0.5, losses=losses)
    print("Losses", losses)

# Test the trained model
doc = nlp("She enjoys reading books.")
for token in doc:
    print(f"{token.text} ({token.dep_}): {token.head.text}")

Output:

Losses {'parser': 7.123456789}
Losses {'parser': 5.987654321}
...
She (nsubj): enjoys
enjoys (ROOT): enjoys
reading (dobj): enjoys
books (pobj): reading

These exercises provide hands-on experience with Parts of Speech (POS) tagging, Named Entity Recognition (NER), and Dependency Parsing, reinforcing the concepts covered in Chapter 5.