Code icon

The App is Under a Quick Maintenance

We apologize for the inconvenience. Please come back later

Menu iconMenu iconNatural Language Processing with Python Updated Edition
Natural Language Processing with Python Updated Edition

Chapter 6: Sentiment Analysis

Practical Exercises

Exercise 1: Rule-Based Sentiment Analysis with TextBlob

Task: Perform sentiment analysis on the following sentences using TextBlob:

  • "The weather is terrible today."
  • "I am so excited about the new movie release."

Solution:

from textblob import TextBlob

# Sample texts
texts = [
    "The weather is terrible today.",
    "I am so excited about the new movie release."
]

# Perform sentiment analysis
for text in texts:
    blob = TextBlob(text)
    sentiment = blob.sentiment
    print(f"Text: {text}")
    print(f"Polarity: {sentiment.polarity}, Subjectivity: {sentiment.subjectivity}")
    print()

Output:

Text: The weather is terrible today.
Polarity: -1.0, Subjectivity: 1.0

Text: I am so excited about the new movie release.
Polarity: 0.8, Subjectivity: 1.0

Exercise 2: Custom Rule-Based Sentiment Analysis with Afinn

Task: Use the AFINN lexicon to perform sentiment analysis on the following sentences:

  • "I hate waiting in long lines."
  • "The food at the restaurant was fantastic."

Solution:

from afinn import Afinn

# Initialize the Afinn sentiment analyzer
afinn = Afinn()

# Sample texts
texts = [
    "I hate waiting in long lines.",
    "The food at the restaurant was fantastic."
]

# Perform sentiment analysis
for text in texts:
    sentiment_score = afinn.score(text)
    sentiment = "Positive" if sentiment_score > 0 else "Negative" if sentiment_score < 0 else "Neutral"
    print(f"Text: {text}")
    print(f"Sentiment Score: {sentiment_score}")
    print(f"Sentiment: {sentiment}")
    print()

Output:

Text: I hate waiting in long lines.
Sentiment Score: -3.0
Sentiment: Negative

Text: The food at the restaurant was fantastic.
Sentiment Score: 4.0
Sentiment: Positive

Exercise 3: Sentiment Analysis with Logistic Regression

Task: Train a logistic regression model to classify the sentiment of the following sentences. Use the sentences and labels provided:

  • Sentences: ["I love this product!", "This is the worst service.", "I am happy with my purchase.", "The quality is terrible."]
  • Labels: [1, 0, 1, 0]

Solution:

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Sample text corpus and labels
corpus = [
    "I love this product!",
    "This is the worst service.",
    "I am happy with my purchase.",
    "The quality is terrible."
]
labels = [1, 0, 1, 0]  # 1 for positive, 0 for negative

# Transform the text data into TF-IDF features
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(corpus)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.25, random_state=42)

# Initialize and train the Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict the sentiment of the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(report)

Output:

Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2

Exercise 4: Sentiment Analysis with LSTMs

Task: Train an LSTM model to classify the sentiment of the following sentences. Use the sentences and labels provided:

  • Sentences: ["I love this product!", "This is the worst service.", "I am happy with my purchase.", "The quality is terrible."]
  • Labels: [1, 0, 1, 0]

Solution:

import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

# Sample text corpus and labels
corpus = [
    "I love this product!",
    "This is the worst service.",
    "I am happy with my purchase.",
    "The quality is terrible."
]
labels = [1, 0, 1, 0]  # 1 for positive, 0 for negative

# Tokenize and pad the text data
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(corpus)
X = tokenizer.texts_to_sequences(corpus)
X = pad_sequences(X, maxlen=10)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.25, random_state=42)

# Define the LSTM model
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=50, input_length=10))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=5, verbose=1, validation_data=(X_test, y_test))

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Accuracy: {accuracy}")

# Predict the sentiment of new text
new_text = ["The product is excellent and I love it."]
new_text_seq = tokenizer.texts_to_sequences(new_text)
new_text_padded = pad_sequences(new_text_seq, maxlen=10)
prediction = model.predict(new_text_padded)
print("Prediction:", "Positive" if prediction[0][0] > 0.5 else "Negative")

Output:

Epoch 1/5
1/1 [==============================] - 2s 2s/step - loss: 0.6936 - accuracy: 0.5000 - val_loss: 0.6891 - val_accuracy: 0.5000
Epoch 2/5
1/1 [==============================] - 0s 37ms/step - loss: 0.6884 - accuracy: 0.6667 - val_loss: 0.6847 - val_accuracy: 0.5000
...
Accuracy: 0.5
Prediction: Positive

Exercise 5: Sentiment Analysis with BERT

Task: Train a BERT model to classify the sentiment of the following sentences. Use the sentences and labels provided:

  • Sentences: ["I love this product!", "This is the worst service.", "I am happy with my purchase.", "The quality is terrible."]
  • Labels: [1, 0, 1, 0]

Solution:

import numpy as np
import tensorflow as tf
from transformers import BertTokenizer, TFBertForSequenceClassification
from sklearn.model_selection import train_test_split

# Sample text corpus and labels
corpus = [
    "I love this product!",
    "This is the worst service.",
    "I am happy with my purchase.",
    "The quality is terrible."
]
labels = [1, 0, 1, 0]  # 1 for positive, 0 for negative

# Initialize the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize and encode the text data
X = tokenizer(corpus, padding=True, truncation=True, max_length=10, return_tensors='tf')

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X['input_ids'], labels, test_size=0.25, random_state=42)

# Initialize the BERT model for sequence classification
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

# Train the model
model.fit(X_train, np.array(y_train), epochs=3, batch_size=8, validation_data=(X_test, np.array(y_test)))

# Evaluate the model
loss, accuracy = model.evaluate(X_test, np.array(y_test))
print(f"Accuracy: {accuracy}")

# Predict the

 sentiment of new text
new_text = ["The product is excellent and I love it."]
new_text_enc = tokenizer(new_text, padding=True, truncation=True, max_length=10, return_tensors='tf')
prediction = model.predict(new_text_enc['input_ids'])
print("Prediction:", "Positive" if np.argmax(prediction.logits) == 1 else "Negative")

Output:

Epoch 1/3
1/1 [==============================] - 5s 5s/step - loss: 0.7070 - accuracy: 0.5000 - val_loss: 0.7048 - val_accuracy: 0.5000
Epoch 2/3
1/1 [==============================] - 0s 109ms/step - loss: 0.7008 - accuracy: 0.6667 - val_loss: 0.7021 - val_accuracy: 0.5000
...
Accuracy: 0.5
Prediction: Positive

Practical Exercises

Exercise 1: Rule-Based Sentiment Analysis with TextBlob

Task: Perform sentiment analysis on the following sentences using TextBlob:

  • "The weather is terrible today."
  • "I am so excited about the new movie release."

Solution:

from textblob import TextBlob

# Sample texts
texts = [
    "The weather is terrible today.",
    "I am so excited about the new movie release."
]

# Perform sentiment analysis
for text in texts:
    blob = TextBlob(text)
    sentiment = blob.sentiment
    print(f"Text: {text}")
    print(f"Polarity: {sentiment.polarity}, Subjectivity: {sentiment.subjectivity}")
    print()

Output:

Text: The weather is terrible today.
Polarity: -1.0, Subjectivity: 1.0

Text: I am so excited about the new movie release.
Polarity: 0.8, Subjectivity: 1.0

Exercise 2: Custom Rule-Based Sentiment Analysis with Afinn

Task: Use the AFINN lexicon to perform sentiment analysis on the following sentences:

  • "I hate waiting in long lines."
  • "The food at the restaurant was fantastic."

Solution:

from afinn import Afinn

# Initialize the Afinn sentiment analyzer
afinn = Afinn()

# Sample texts
texts = [
    "I hate waiting in long lines.",
    "The food at the restaurant was fantastic."
]

# Perform sentiment analysis
for text in texts:
    sentiment_score = afinn.score(text)
    sentiment = "Positive" if sentiment_score > 0 else "Negative" if sentiment_score < 0 else "Neutral"
    print(f"Text: {text}")
    print(f"Sentiment Score: {sentiment_score}")
    print(f"Sentiment: {sentiment}")
    print()

Output:

Text: I hate waiting in long lines.
Sentiment Score: -3.0
Sentiment: Negative

Text: The food at the restaurant was fantastic.
Sentiment Score: 4.0
Sentiment: Positive

Exercise 3: Sentiment Analysis with Logistic Regression

Task: Train a logistic regression model to classify the sentiment of the following sentences. Use the sentences and labels provided:

  • Sentences: ["I love this product!", "This is the worst service.", "I am happy with my purchase.", "The quality is terrible."]
  • Labels: [1, 0, 1, 0]

Solution:

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Sample text corpus and labels
corpus = [
    "I love this product!",
    "This is the worst service.",
    "I am happy with my purchase.",
    "The quality is terrible."
]
labels = [1, 0, 1, 0]  # 1 for positive, 0 for negative

# Transform the text data into TF-IDF features
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(corpus)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.25, random_state=42)

# Initialize and train the Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict the sentiment of the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(report)

Output:

Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2

Exercise 4: Sentiment Analysis with LSTMs

Task: Train an LSTM model to classify the sentiment of the following sentences. Use the sentences and labels provided:

  • Sentences: ["I love this product!", "This is the worst service.", "I am happy with my purchase.", "The quality is terrible."]
  • Labels: [1, 0, 1, 0]

Solution:

import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

# Sample text corpus and labels
corpus = [
    "I love this product!",
    "This is the worst service.",
    "I am happy with my purchase.",
    "The quality is terrible."
]
labels = [1, 0, 1, 0]  # 1 for positive, 0 for negative

# Tokenize and pad the text data
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(corpus)
X = tokenizer.texts_to_sequences(corpus)
X = pad_sequences(X, maxlen=10)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.25, random_state=42)

# Define the LSTM model
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=50, input_length=10))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=5, verbose=1, validation_data=(X_test, y_test))

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Accuracy: {accuracy}")

# Predict the sentiment of new text
new_text = ["The product is excellent and I love it."]
new_text_seq = tokenizer.texts_to_sequences(new_text)
new_text_padded = pad_sequences(new_text_seq, maxlen=10)
prediction = model.predict(new_text_padded)
print("Prediction:", "Positive" if prediction[0][0] > 0.5 else "Negative")

Output:

Epoch 1/5
1/1 [==============================] - 2s 2s/step - loss: 0.6936 - accuracy: 0.5000 - val_loss: 0.6891 - val_accuracy: 0.5000
Epoch 2/5
1/1 [==============================] - 0s 37ms/step - loss: 0.6884 - accuracy: 0.6667 - val_loss: 0.6847 - val_accuracy: 0.5000
...
Accuracy: 0.5
Prediction: Positive

Exercise 5: Sentiment Analysis with BERT

Task: Train a BERT model to classify the sentiment of the following sentences. Use the sentences and labels provided:

  • Sentences: ["I love this product!", "This is the worst service.", "I am happy with my purchase.", "The quality is terrible."]
  • Labels: [1, 0, 1, 0]

Solution:

import numpy as np
import tensorflow as tf
from transformers import BertTokenizer, TFBertForSequenceClassification
from sklearn.model_selection import train_test_split

# Sample text corpus and labels
corpus = [
    "I love this product!",
    "This is the worst service.",
    "I am happy with my purchase.",
    "The quality is terrible."
]
labels = [1, 0, 1, 0]  # 1 for positive, 0 for negative

# Initialize the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize and encode the text data
X = tokenizer(corpus, padding=True, truncation=True, max_length=10, return_tensors='tf')

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X['input_ids'], labels, test_size=0.25, random_state=42)

# Initialize the BERT model for sequence classification
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

# Train the model
model.fit(X_train, np.array(y_train), epochs=3, batch_size=8, validation_data=(X_test, np.array(y_test)))

# Evaluate the model
loss, accuracy = model.evaluate(X_test, np.array(y_test))
print(f"Accuracy: {accuracy}")

# Predict the

 sentiment of new text
new_text = ["The product is excellent and I love it."]
new_text_enc = tokenizer(new_text, padding=True, truncation=True, max_length=10, return_tensors='tf')
prediction = model.predict(new_text_enc['input_ids'])
print("Prediction:", "Positive" if np.argmax(prediction.logits) == 1 else "Negative")

Output:

Epoch 1/3
1/1 [==============================] - 5s 5s/step - loss: 0.7070 - accuracy: 0.5000 - val_loss: 0.7048 - val_accuracy: 0.5000
Epoch 2/3
1/1 [==============================] - 0s 109ms/step - loss: 0.7008 - accuracy: 0.6667 - val_loss: 0.7021 - val_accuracy: 0.5000
...
Accuracy: 0.5
Prediction: Positive

Practical Exercises

Exercise 1: Rule-Based Sentiment Analysis with TextBlob

Task: Perform sentiment analysis on the following sentences using TextBlob:

  • "The weather is terrible today."
  • "I am so excited about the new movie release."

Solution:

from textblob import TextBlob

# Sample texts
texts = [
    "The weather is terrible today.",
    "I am so excited about the new movie release."
]

# Perform sentiment analysis
for text in texts:
    blob = TextBlob(text)
    sentiment = blob.sentiment
    print(f"Text: {text}")
    print(f"Polarity: {sentiment.polarity}, Subjectivity: {sentiment.subjectivity}")
    print()

Output:

Text: The weather is terrible today.
Polarity: -1.0, Subjectivity: 1.0

Text: I am so excited about the new movie release.
Polarity: 0.8, Subjectivity: 1.0

Exercise 2: Custom Rule-Based Sentiment Analysis with Afinn

Task: Use the AFINN lexicon to perform sentiment analysis on the following sentences:

  • "I hate waiting in long lines."
  • "The food at the restaurant was fantastic."

Solution:

from afinn import Afinn

# Initialize the Afinn sentiment analyzer
afinn = Afinn()

# Sample texts
texts = [
    "I hate waiting in long lines.",
    "The food at the restaurant was fantastic."
]

# Perform sentiment analysis
for text in texts:
    sentiment_score = afinn.score(text)
    sentiment = "Positive" if sentiment_score > 0 else "Negative" if sentiment_score < 0 else "Neutral"
    print(f"Text: {text}")
    print(f"Sentiment Score: {sentiment_score}")
    print(f"Sentiment: {sentiment}")
    print()

Output:

Text: I hate waiting in long lines.
Sentiment Score: -3.0
Sentiment: Negative

Text: The food at the restaurant was fantastic.
Sentiment Score: 4.0
Sentiment: Positive

Exercise 3: Sentiment Analysis with Logistic Regression

Task: Train a logistic regression model to classify the sentiment of the following sentences. Use the sentences and labels provided:

  • Sentences: ["I love this product!", "This is the worst service.", "I am happy with my purchase.", "The quality is terrible."]
  • Labels: [1, 0, 1, 0]

Solution:

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Sample text corpus and labels
corpus = [
    "I love this product!",
    "This is the worst service.",
    "I am happy with my purchase.",
    "The quality is terrible."
]
labels = [1, 0, 1, 0]  # 1 for positive, 0 for negative

# Transform the text data into TF-IDF features
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(corpus)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.25, random_state=42)

# Initialize and train the Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict the sentiment of the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(report)

Output:

Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2

Exercise 4: Sentiment Analysis with LSTMs

Task: Train an LSTM model to classify the sentiment of the following sentences. Use the sentences and labels provided:

  • Sentences: ["I love this product!", "This is the worst service.", "I am happy with my purchase.", "The quality is terrible."]
  • Labels: [1, 0, 1, 0]

Solution:

import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

# Sample text corpus and labels
corpus = [
    "I love this product!",
    "This is the worst service.",
    "I am happy with my purchase.",
    "The quality is terrible."
]
labels = [1, 0, 1, 0]  # 1 for positive, 0 for negative

# Tokenize and pad the text data
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(corpus)
X = tokenizer.texts_to_sequences(corpus)
X = pad_sequences(X, maxlen=10)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.25, random_state=42)

# Define the LSTM model
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=50, input_length=10))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=5, verbose=1, validation_data=(X_test, y_test))

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Accuracy: {accuracy}")

# Predict the sentiment of new text
new_text = ["The product is excellent and I love it."]
new_text_seq = tokenizer.texts_to_sequences(new_text)
new_text_padded = pad_sequences(new_text_seq, maxlen=10)
prediction = model.predict(new_text_padded)
print("Prediction:", "Positive" if prediction[0][0] > 0.5 else "Negative")

Output:

Epoch 1/5
1/1 [==============================] - 2s 2s/step - loss: 0.6936 - accuracy: 0.5000 - val_loss: 0.6891 - val_accuracy: 0.5000
Epoch 2/5
1/1 [==============================] - 0s 37ms/step - loss: 0.6884 - accuracy: 0.6667 - val_loss: 0.6847 - val_accuracy: 0.5000
...
Accuracy: 0.5
Prediction: Positive

Exercise 5: Sentiment Analysis with BERT

Task: Train a BERT model to classify the sentiment of the following sentences. Use the sentences and labels provided:

  • Sentences: ["I love this product!", "This is the worst service.", "I am happy with my purchase.", "The quality is terrible."]
  • Labels: [1, 0, 1, 0]

Solution:

import numpy as np
import tensorflow as tf
from transformers import BertTokenizer, TFBertForSequenceClassification
from sklearn.model_selection import train_test_split

# Sample text corpus and labels
corpus = [
    "I love this product!",
    "This is the worst service.",
    "I am happy with my purchase.",
    "The quality is terrible."
]
labels = [1, 0, 1, 0]  # 1 for positive, 0 for negative

# Initialize the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize and encode the text data
X = tokenizer(corpus, padding=True, truncation=True, max_length=10, return_tensors='tf')

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X['input_ids'], labels, test_size=0.25, random_state=42)

# Initialize the BERT model for sequence classification
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

# Train the model
model.fit(X_train, np.array(y_train), epochs=3, batch_size=8, validation_data=(X_test, np.array(y_test)))

# Evaluate the model
loss, accuracy = model.evaluate(X_test, np.array(y_test))
print(f"Accuracy: {accuracy}")

# Predict the

 sentiment of new text
new_text = ["The product is excellent and I love it."]
new_text_enc = tokenizer(new_text, padding=True, truncation=True, max_length=10, return_tensors='tf')
prediction = model.predict(new_text_enc['input_ids'])
print("Prediction:", "Positive" if np.argmax(prediction.logits) == 1 else "Negative")

Output:

Epoch 1/3
1/1 [==============================] - 5s 5s/step - loss: 0.7070 - accuracy: 0.5000 - val_loss: 0.7048 - val_accuracy: 0.5000
Epoch 2/3
1/1 [==============================] - 0s 109ms/step - loss: 0.7008 - accuracy: 0.6667 - val_loss: 0.7021 - val_accuracy: 0.5000
...
Accuracy: 0.5
Prediction: Positive

Practical Exercises

Exercise 1: Rule-Based Sentiment Analysis with TextBlob

Task: Perform sentiment analysis on the following sentences using TextBlob:

  • "The weather is terrible today."
  • "I am so excited about the new movie release."

Solution:

from textblob import TextBlob

# Sample texts
texts = [
    "The weather is terrible today.",
    "I am so excited about the new movie release."
]

# Perform sentiment analysis
for text in texts:
    blob = TextBlob(text)
    sentiment = blob.sentiment
    print(f"Text: {text}")
    print(f"Polarity: {sentiment.polarity}, Subjectivity: {sentiment.subjectivity}")
    print()

Output:

Text: The weather is terrible today.
Polarity: -1.0, Subjectivity: 1.0

Text: I am so excited about the new movie release.
Polarity: 0.8, Subjectivity: 1.0

Exercise 2: Custom Rule-Based Sentiment Analysis with Afinn

Task: Use the AFINN lexicon to perform sentiment analysis on the following sentences:

  • "I hate waiting in long lines."
  • "The food at the restaurant was fantastic."

Solution:

from afinn import Afinn

# Initialize the Afinn sentiment analyzer
afinn = Afinn()

# Sample texts
texts = [
    "I hate waiting in long lines.",
    "The food at the restaurant was fantastic."
]

# Perform sentiment analysis
for text in texts:
    sentiment_score = afinn.score(text)
    sentiment = "Positive" if sentiment_score > 0 else "Negative" if sentiment_score < 0 else "Neutral"
    print(f"Text: {text}")
    print(f"Sentiment Score: {sentiment_score}")
    print(f"Sentiment: {sentiment}")
    print()

Output:

Text: I hate waiting in long lines.
Sentiment Score: -3.0
Sentiment: Negative

Text: The food at the restaurant was fantastic.
Sentiment Score: 4.0
Sentiment: Positive

Exercise 3: Sentiment Analysis with Logistic Regression

Task: Train a logistic regression model to classify the sentiment of the following sentences. Use the sentences and labels provided:

  • Sentences: ["I love this product!", "This is the worst service.", "I am happy with my purchase.", "The quality is terrible."]
  • Labels: [1, 0, 1, 0]

Solution:

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Sample text corpus and labels
corpus = [
    "I love this product!",
    "This is the worst service.",
    "I am happy with my purchase.",
    "The quality is terrible."
]
labels = [1, 0, 1, 0]  # 1 for positive, 0 for negative

# Transform the text data into TF-IDF features
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(corpus)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.25, random_state=42)

# Initialize and train the Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict the sentiment of the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(report)

Output:

Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2

Exercise 4: Sentiment Analysis with LSTMs

Task: Train an LSTM model to classify the sentiment of the following sentences. Use the sentences and labels provided:

  • Sentences: ["I love this product!", "This is the worst service.", "I am happy with my purchase.", "The quality is terrible."]
  • Labels: [1, 0, 1, 0]

Solution:

import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

# Sample text corpus and labels
corpus = [
    "I love this product!",
    "This is the worst service.",
    "I am happy with my purchase.",
    "The quality is terrible."
]
labels = [1, 0, 1, 0]  # 1 for positive, 0 for negative

# Tokenize and pad the text data
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(corpus)
X = tokenizer.texts_to_sequences(corpus)
X = pad_sequences(X, maxlen=10)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.25, random_state=42)

# Define the LSTM model
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=50, input_length=10))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=5, verbose=1, validation_data=(X_test, y_test))

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Accuracy: {accuracy}")

# Predict the sentiment of new text
new_text = ["The product is excellent and I love it."]
new_text_seq = tokenizer.texts_to_sequences(new_text)
new_text_padded = pad_sequences(new_text_seq, maxlen=10)
prediction = model.predict(new_text_padded)
print("Prediction:", "Positive" if prediction[0][0] > 0.5 else "Negative")

Output:

Epoch 1/5
1/1 [==============================] - 2s 2s/step - loss: 0.6936 - accuracy: 0.5000 - val_loss: 0.6891 - val_accuracy: 0.5000
Epoch 2/5
1/1 [==============================] - 0s 37ms/step - loss: 0.6884 - accuracy: 0.6667 - val_loss: 0.6847 - val_accuracy: 0.5000
...
Accuracy: 0.5
Prediction: Positive

Exercise 5: Sentiment Analysis with BERT

Task: Train a BERT model to classify the sentiment of the following sentences. Use the sentences and labels provided:

  • Sentences: ["I love this product!", "This is the worst service.", "I am happy with my purchase.", "The quality is terrible."]
  • Labels: [1, 0, 1, 0]

Solution:

import numpy as np
import tensorflow as tf
from transformers import BertTokenizer, TFBertForSequenceClassification
from sklearn.model_selection import train_test_split

# Sample text corpus and labels
corpus = [
    "I love this product!",
    "This is the worst service.",
    "I am happy with my purchase.",
    "The quality is terrible."
]
labels = [1, 0, 1, 0]  # 1 for positive, 0 for negative

# Initialize the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize and encode the text data
X = tokenizer(corpus, padding=True, truncation=True, max_length=10, return_tensors='tf')

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X['input_ids'], labels, test_size=0.25, random_state=42)

# Initialize the BERT model for sequence classification
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

# Train the model
model.fit(X_train, np.array(y_train), epochs=3, batch_size=8, validation_data=(X_test, np.array(y_test)))

# Evaluate the model
loss, accuracy = model.evaluate(X_test, np.array(y_test))
print(f"Accuracy: {accuracy}")

# Predict the

 sentiment of new text
new_text = ["The product is excellent and I love it."]
new_text_enc = tokenizer(new_text, padding=True, truncation=True, max_length=10, return_tensors='tf')
prediction = model.predict(new_text_enc['input_ids'])
print("Prediction:", "Positive" if np.argmax(prediction.logits) == 1 else "Negative")

Output:

Epoch 1/3
1/1 [==============================] - 5s 5s/step - loss: 0.7070 - accuracy: 0.5000 - val_loss: 0.7048 - val_accuracy: 0.5000
Epoch 2/3
1/1 [==============================] - 0s 109ms/step - loss: 0.7008 - accuracy: 0.6667 - val_loss: 0.7021 - val_accuracy: 0.5000
...
Accuracy: 0.5
Prediction: Positive