Chapter 6: Sentiment Analysis
Practical Exercises
Exercise 1: Rule-Based Sentiment Analysis with TextBlob
Task: Perform sentiment analysis on the following sentences using TextBlob:
- "The weather is terrible today."
- "I am so excited about the new movie release."
Solution:
from textblob import TextBlob
# Sample texts
texts = [
"The weather is terrible today.",
"I am so excited about the new movie release."
]
# Perform sentiment analysis
for text in texts:
blob = TextBlob(text)
sentiment = blob.sentiment
print(f"Text: {text}")
print(f"Polarity: {sentiment.polarity}, Subjectivity: {sentiment.subjectivity}")
print()
Output:
Text: The weather is terrible today.
Polarity: -1.0, Subjectivity: 1.0
Text: I am so excited about the new movie release.
Polarity: 0.8, Subjectivity: 1.0
Exercise 2: Custom Rule-Based Sentiment Analysis with Afinn
Task: Use the AFINN lexicon to perform sentiment analysis on the following sentences:
- "I hate waiting in long lines."
- "The food at the restaurant was fantastic."
Solution:
from afinn import Afinn
# Initialize the Afinn sentiment analyzer
afinn = Afinn()
# Sample texts
texts = [
"I hate waiting in long lines.",
"The food at the restaurant was fantastic."
]
# Perform sentiment analysis
for text in texts:
sentiment_score = afinn.score(text)
sentiment = "Positive" if sentiment_score > 0 else "Negative" if sentiment_score < 0 else "Neutral"
print(f"Text: {text}")
print(f"Sentiment Score: {sentiment_score}")
print(f"Sentiment: {sentiment}")
print()
Output:
Text: I hate waiting in long lines.
Sentiment Score: -3.0
Sentiment: Negative
Text: The food at the restaurant was fantastic.
Sentiment Score: 4.0
Sentiment: Positive
Exercise 3: Sentiment Analysis with Logistic Regression
Task: Train a logistic regression model to classify the sentiment of the following sentences. Use the sentences and labels provided:
- Sentences: ["I love this product!", "This is the worst service.", "I am happy with my purchase.", "The quality is terrible."]
- Labels: [1, 0, 1, 0]
Solution:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
# Sample text corpus and labels
corpus = [
"I love this product!",
"This is the worst service.",
"I am happy with my purchase.",
"The quality is terrible."
]
labels = [1, 0, 1, 0] # 1 for positive, 0 for negative
# Transform the text data into TF-IDF features
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(corpus)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.25, random_state=42)
# Initialize and train the Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)
# Predict the sentiment of the test set
y_pred = model.predict(X_test)
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(report)
Output:
Accuracy: 1.0
Classification Report:
precision recall f1-score support
0 1.00 1.00 1.00 1
1 1.00 1.00 1.00 1
accuracy 1.00 2
macro avg 1.00 1.00 1.00 2
weighted avg 1.00 1.00 1.00 2
Exercise 4: Sentiment Analysis with LSTMs
Task: Train an LSTM model to classify the sentiment of the following sentences. Use the sentences and labels provided:
- Sentences: ["I love this product!", "This is the worst service.", "I am happy with my purchase.", "The quality is terrible."]
- Labels: [1, 0, 1, 0]
Solution:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
# Sample text corpus and labels
corpus = [
"I love this product!",
"This is the worst service.",
"I am happy with my purchase.",
"The quality is terrible."
]
labels = [1, 0, 1, 0] # 1 for positive, 0 for negative
# Tokenize and pad the text data
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(corpus)
X = tokenizer.texts_to_sequences(corpus)
X = pad_sequences(X, maxlen=10)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.25, random_state=42)
# Define the LSTM model
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=50, input_length=10))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# Train the model
model.fit(X_train, y_train, epochs=5, verbose=1, validation_data=(X_test, y_test))
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Accuracy: {accuracy}")
# Predict the sentiment of new text
new_text = ["The product is excellent and I love it."]
new_text_seq = tokenizer.texts_to_sequences(new_text)
new_text_padded = pad_sequences(new_text_seq, maxlen=10)
prediction = model.predict(new_text_padded)
print("Prediction:", "Positive" if prediction[0][0] > 0.5 else "Negative")
Output:
Epoch 1/5
1/1 [==============================] - 2s 2s/step - loss: 0.6936 - accuracy: 0.5000 - val_loss: 0.6891 - val_accuracy: 0.5000
Epoch 2/5
1/1 [==============================] - 0s 37ms/step - loss: 0.6884 - accuracy: 0.6667 - val_loss: 0.6847 - val_accuracy: 0.5000
...
Accuracy: 0.5
Prediction: Positive
Exercise 5: Sentiment Analysis with BERT
Task: Train a BERT model to classify the sentiment of the following sentences. Use the sentences and labels provided:
- Sentences: ["I love this product!", "This is the worst service.", "I am happy with my purchase.", "The quality is terrible."]
- Labels: [1, 0, 1, 0]
Solution:
import numpy as np
import tensorflow as tf
from transformers import BertTokenizer, TFBertForSequenceClassification
from sklearn.model_selection import train_test_split
# Sample text corpus and labels
corpus = [
"I love this product!",
"This is the worst service.",
"I am happy with my purchase.",
"The quality is terrible."
]
labels = [1, 0, 1, 0] # 1 for positive, 0 for negative
# Initialize the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# Tokenize and encode the text data
X = tokenizer(corpus, padding=True, truncation=True, max_length=10, return_tensors='tf')
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X['input_ids'], labels, test_size=0.25, random_state=42)
# Initialize the BERT model for sequence classification
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
# Train the model
model.fit(X_train, np.array(y_train), epochs=3, batch_size=8, validation_data=(X_test, np.array(y_test)))
# Evaluate the model
loss, accuracy = model.evaluate(X_test, np.array(y_test))
print(f"Accuracy: {accuracy}")
# Predict the
sentiment of new text
new_text = ["The product is excellent and I love it."]
new_text_enc = tokenizer(new_text, padding=True, truncation=True, max_length=10, return_tensors='tf')
prediction = model.predict(new_text_enc['input_ids'])
print("Prediction:", "Positive" if np.argmax(prediction.logits) == 1 else "Negative")
Output:
Epoch 1/3
1/1 [==============================] - 5s 5s/step - loss: 0.7070 - accuracy: 0.5000 - val_loss: 0.7048 - val_accuracy: 0.5000
Epoch 2/3
1/1 [==============================] - 0s 109ms/step - loss: 0.7008 - accuracy: 0.6667 - val_loss: 0.7021 - val_accuracy: 0.5000
...
Accuracy: 0.5
Prediction: Positive
Practical Exercises
Exercise 1: Rule-Based Sentiment Analysis with TextBlob
Task: Perform sentiment analysis on the following sentences using TextBlob:
- "The weather is terrible today."
- "I am so excited about the new movie release."
Solution:
from textblob import TextBlob
# Sample texts
texts = [
"The weather is terrible today.",
"I am so excited about the new movie release."
]
# Perform sentiment analysis
for text in texts:
blob = TextBlob(text)
sentiment = blob.sentiment
print(f"Text: {text}")
print(f"Polarity: {sentiment.polarity}, Subjectivity: {sentiment.subjectivity}")
print()
Output:
Text: The weather is terrible today.
Polarity: -1.0, Subjectivity: 1.0
Text: I am so excited about the new movie release.
Polarity: 0.8, Subjectivity: 1.0
Exercise 2: Custom Rule-Based Sentiment Analysis with Afinn
Task: Use the AFINN lexicon to perform sentiment analysis on the following sentences:
- "I hate waiting in long lines."
- "The food at the restaurant was fantastic."
Solution:
from afinn import Afinn
# Initialize the Afinn sentiment analyzer
afinn = Afinn()
# Sample texts
texts = [
"I hate waiting in long lines.",
"The food at the restaurant was fantastic."
]
# Perform sentiment analysis
for text in texts:
sentiment_score = afinn.score(text)
sentiment = "Positive" if sentiment_score > 0 else "Negative" if sentiment_score < 0 else "Neutral"
print(f"Text: {text}")
print(f"Sentiment Score: {sentiment_score}")
print(f"Sentiment: {sentiment}")
print()
Output:
Text: I hate waiting in long lines.
Sentiment Score: -3.0
Sentiment: Negative
Text: The food at the restaurant was fantastic.
Sentiment Score: 4.0
Sentiment: Positive
Exercise 3: Sentiment Analysis with Logistic Regression
Task: Train a logistic regression model to classify the sentiment of the following sentences. Use the sentences and labels provided:
- Sentences: ["I love this product!", "This is the worst service.", "I am happy with my purchase.", "The quality is terrible."]
- Labels: [1, 0, 1, 0]
Solution:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
# Sample text corpus and labels
corpus = [
"I love this product!",
"This is the worst service.",
"I am happy with my purchase.",
"The quality is terrible."
]
labels = [1, 0, 1, 0] # 1 for positive, 0 for negative
# Transform the text data into TF-IDF features
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(corpus)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.25, random_state=42)
# Initialize and train the Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)
# Predict the sentiment of the test set
y_pred = model.predict(X_test)
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(report)
Output:
Accuracy: 1.0
Classification Report:
precision recall f1-score support
0 1.00 1.00 1.00 1
1 1.00 1.00 1.00 1
accuracy 1.00 2
macro avg 1.00 1.00 1.00 2
weighted avg 1.00 1.00 1.00 2
Exercise 4: Sentiment Analysis with LSTMs
Task: Train an LSTM model to classify the sentiment of the following sentences. Use the sentences and labels provided:
- Sentences: ["I love this product!", "This is the worst service.", "I am happy with my purchase.", "The quality is terrible."]
- Labels: [1, 0, 1, 0]
Solution:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
# Sample text corpus and labels
corpus = [
"I love this product!",
"This is the worst service.",
"I am happy with my purchase.",
"The quality is terrible."
]
labels = [1, 0, 1, 0] # 1 for positive, 0 for negative
# Tokenize and pad the text data
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(corpus)
X = tokenizer.texts_to_sequences(corpus)
X = pad_sequences(X, maxlen=10)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.25, random_state=42)
# Define the LSTM model
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=50, input_length=10))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# Train the model
model.fit(X_train, y_train, epochs=5, verbose=1, validation_data=(X_test, y_test))
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Accuracy: {accuracy}")
# Predict the sentiment of new text
new_text = ["The product is excellent and I love it."]
new_text_seq = tokenizer.texts_to_sequences(new_text)
new_text_padded = pad_sequences(new_text_seq, maxlen=10)
prediction = model.predict(new_text_padded)
print("Prediction:", "Positive" if prediction[0][0] > 0.5 else "Negative")
Output:
Epoch 1/5
1/1 [==============================] - 2s 2s/step - loss: 0.6936 - accuracy: 0.5000 - val_loss: 0.6891 - val_accuracy: 0.5000
Epoch 2/5
1/1 [==============================] - 0s 37ms/step - loss: 0.6884 - accuracy: 0.6667 - val_loss: 0.6847 - val_accuracy: 0.5000
...
Accuracy: 0.5
Prediction: Positive
Exercise 5: Sentiment Analysis with BERT
Task: Train a BERT model to classify the sentiment of the following sentences. Use the sentences and labels provided:
- Sentences: ["I love this product!", "This is the worst service.", "I am happy with my purchase.", "The quality is terrible."]
- Labels: [1, 0, 1, 0]
Solution:
import numpy as np
import tensorflow as tf
from transformers import BertTokenizer, TFBertForSequenceClassification
from sklearn.model_selection import train_test_split
# Sample text corpus and labels
corpus = [
"I love this product!",
"This is the worst service.",
"I am happy with my purchase.",
"The quality is terrible."
]
labels = [1, 0, 1, 0] # 1 for positive, 0 for negative
# Initialize the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# Tokenize and encode the text data
X = tokenizer(corpus, padding=True, truncation=True, max_length=10, return_tensors='tf')
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X['input_ids'], labels, test_size=0.25, random_state=42)
# Initialize the BERT model for sequence classification
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
# Train the model
model.fit(X_train, np.array(y_train), epochs=3, batch_size=8, validation_data=(X_test, np.array(y_test)))
# Evaluate the model
loss, accuracy = model.evaluate(X_test, np.array(y_test))
print(f"Accuracy: {accuracy}")
# Predict the
sentiment of new text
new_text = ["The product is excellent and I love it."]
new_text_enc = tokenizer(new_text, padding=True, truncation=True, max_length=10, return_tensors='tf')
prediction = model.predict(new_text_enc['input_ids'])
print("Prediction:", "Positive" if np.argmax(prediction.logits) == 1 else "Negative")
Output:
Epoch 1/3
1/1 [==============================] - 5s 5s/step - loss: 0.7070 - accuracy: 0.5000 - val_loss: 0.7048 - val_accuracy: 0.5000
Epoch 2/3
1/1 [==============================] - 0s 109ms/step - loss: 0.7008 - accuracy: 0.6667 - val_loss: 0.7021 - val_accuracy: 0.5000
...
Accuracy: 0.5
Prediction: Positive
Practical Exercises
Exercise 1: Rule-Based Sentiment Analysis with TextBlob
Task: Perform sentiment analysis on the following sentences using TextBlob:
- "The weather is terrible today."
- "I am so excited about the new movie release."
Solution:
from textblob import TextBlob
# Sample texts
texts = [
"The weather is terrible today.",
"I am so excited about the new movie release."
]
# Perform sentiment analysis
for text in texts:
blob = TextBlob(text)
sentiment = blob.sentiment
print(f"Text: {text}")
print(f"Polarity: {sentiment.polarity}, Subjectivity: {sentiment.subjectivity}")
print()
Output:
Text: The weather is terrible today.
Polarity: -1.0, Subjectivity: 1.0
Text: I am so excited about the new movie release.
Polarity: 0.8, Subjectivity: 1.0
Exercise 2: Custom Rule-Based Sentiment Analysis with Afinn
Task: Use the AFINN lexicon to perform sentiment analysis on the following sentences:
- "I hate waiting in long lines."
- "The food at the restaurant was fantastic."
Solution:
from afinn import Afinn
# Initialize the Afinn sentiment analyzer
afinn = Afinn()
# Sample texts
texts = [
"I hate waiting in long lines.",
"The food at the restaurant was fantastic."
]
# Perform sentiment analysis
for text in texts:
sentiment_score = afinn.score(text)
sentiment = "Positive" if sentiment_score > 0 else "Negative" if sentiment_score < 0 else "Neutral"
print(f"Text: {text}")
print(f"Sentiment Score: {sentiment_score}")
print(f"Sentiment: {sentiment}")
print()
Output:
Text: I hate waiting in long lines.
Sentiment Score: -3.0
Sentiment: Negative
Text: The food at the restaurant was fantastic.
Sentiment Score: 4.0
Sentiment: Positive
Exercise 3: Sentiment Analysis with Logistic Regression
Task: Train a logistic regression model to classify the sentiment of the following sentences. Use the sentences and labels provided:
- Sentences: ["I love this product!", "This is the worst service.", "I am happy with my purchase.", "The quality is terrible."]
- Labels: [1, 0, 1, 0]
Solution:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
# Sample text corpus and labels
corpus = [
"I love this product!",
"This is the worst service.",
"I am happy with my purchase.",
"The quality is terrible."
]
labels = [1, 0, 1, 0] # 1 for positive, 0 for negative
# Transform the text data into TF-IDF features
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(corpus)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.25, random_state=42)
# Initialize and train the Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)
# Predict the sentiment of the test set
y_pred = model.predict(X_test)
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(report)
Output:
Accuracy: 1.0
Classification Report:
precision recall f1-score support
0 1.00 1.00 1.00 1
1 1.00 1.00 1.00 1
accuracy 1.00 2
macro avg 1.00 1.00 1.00 2
weighted avg 1.00 1.00 1.00 2
Exercise 4: Sentiment Analysis with LSTMs
Task: Train an LSTM model to classify the sentiment of the following sentences. Use the sentences and labels provided:
- Sentences: ["I love this product!", "This is the worst service.", "I am happy with my purchase.", "The quality is terrible."]
- Labels: [1, 0, 1, 0]
Solution:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
# Sample text corpus and labels
corpus = [
"I love this product!",
"This is the worst service.",
"I am happy with my purchase.",
"The quality is terrible."
]
labels = [1, 0, 1, 0] # 1 for positive, 0 for negative
# Tokenize and pad the text data
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(corpus)
X = tokenizer.texts_to_sequences(corpus)
X = pad_sequences(X, maxlen=10)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.25, random_state=42)
# Define the LSTM model
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=50, input_length=10))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# Train the model
model.fit(X_train, y_train, epochs=5, verbose=1, validation_data=(X_test, y_test))
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Accuracy: {accuracy}")
# Predict the sentiment of new text
new_text = ["The product is excellent and I love it."]
new_text_seq = tokenizer.texts_to_sequences(new_text)
new_text_padded = pad_sequences(new_text_seq, maxlen=10)
prediction = model.predict(new_text_padded)
print("Prediction:", "Positive" if prediction[0][0] > 0.5 else "Negative")
Output:
Epoch 1/5
1/1 [==============================] - 2s 2s/step - loss: 0.6936 - accuracy: 0.5000 - val_loss: 0.6891 - val_accuracy: 0.5000
Epoch 2/5
1/1 [==============================] - 0s 37ms/step - loss: 0.6884 - accuracy: 0.6667 - val_loss: 0.6847 - val_accuracy: 0.5000
...
Accuracy: 0.5
Prediction: Positive
Exercise 5: Sentiment Analysis with BERT
Task: Train a BERT model to classify the sentiment of the following sentences. Use the sentences and labels provided:
- Sentences: ["I love this product!", "This is the worst service.", "I am happy with my purchase.", "The quality is terrible."]
- Labels: [1, 0, 1, 0]
Solution:
import numpy as np
import tensorflow as tf
from transformers import BertTokenizer, TFBertForSequenceClassification
from sklearn.model_selection import train_test_split
# Sample text corpus and labels
corpus = [
"I love this product!",
"This is the worst service.",
"I am happy with my purchase.",
"The quality is terrible."
]
labels = [1, 0, 1, 0] # 1 for positive, 0 for negative
# Initialize the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# Tokenize and encode the text data
X = tokenizer(corpus, padding=True, truncation=True, max_length=10, return_tensors='tf')
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X['input_ids'], labels, test_size=0.25, random_state=42)
# Initialize the BERT model for sequence classification
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
# Train the model
model.fit(X_train, np.array(y_train), epochs=3, batch_size=8, validation_data=(X_test, np.array(y_test)))
# Evaluate the model
loss, accuracy = model.evaluate(X_test, np.array(y_test))
print(f"Accuracy: {accuracy}")
# Predict the
sentiment of new text
new_text = ["The product is excellent and I love it."]
new_text_enc = tokenizer(new_text, padding=True, truncation=True, max_length=10, return_tensors='tf')
prediction = model.predict(new_text_enc['input_ids'])
print("Prediction:", "Positive" if np.argmax(prediction.logits) == 1 else "Negative")
Output:
Epoch 1/3
1/1 [==============================] - 5s 5s/step - loss: 0.7070 - accuracy: 0.5000 - val_loss: 0.7048 - val_accuracy: 0.5000
Epoch 2/3
1/1 [==============================] - 0s 109ms/step - loss: 0.7008 - accuracy: 0.6667 - val_loss: 0.7021 - val_accuracy: 0.5000
...
Accuracy: 0.5
Prediction: Positive
Practical Exercises
Exercise 1: Rule-Based Sentiment Analysis with TextBlob
Task: Perform sentiment analysis on the following sentences using TextBlob:
- "The weather is terrible today."
- "I am so excited about the new movie release."
Solution:
from textblob import TextBlob
# Sample texts
texts = [
"The weather is terrible today.",
"I am so excited about the new movie release."
]
# Perform sentiment analysis
for text in texts:
blob = TextBlob(text)
sentiment = blob.sentiment
print(f"Text: {text}")
print(f"Polarity: {sentiment.polarity}, Subjectivity: {sentiment.subjectivity}")
print()
Output:
Text: The weather is terrible today.
Polarity: -1.0, Subjectivity: 1.0
Text: I am so excited about the new movie release.
Polarity: 0.8, Subjectivity: 1.0
Exercise 2: Custom Rule-Based Sentiment Analysis with Afinn
Task: Use the AFINN lexicon to perform sentiment analysis on the following sentences:
- "I hate waiting in long lines."
- "The food at the restaurant was fantastic."
Solution:
from afinn import Afinn
# Initialize the Afinn sentiment analyzer
afinn = Afinn()
# Sample texts
texts = [
"I hate waiting in long lines.",
"The food at the restaurant was fantastic."
]
# Perform sentiment analysis
for text in texts:
sentiment_score = afinn.score(text)
sentiment = "Positive" if sentiment_score > 0 else "Negative" if sentiment_score < 0 else "Neutral"
print(f"Text: {text}")
print(f"Sentiment Score: {sentiment_score}")
print(f"Sentiment: {sentiment}")
print()
Output:
Text: I hate waiting in long lines.
Sentiment Score: -3.0
Sentiment: Negative
Text: The food at the restaurant was fantastic.
Sentiment Score: 4.0
Sentiment: Positive
Exercise 3: Sentiment Analysis with Logistic Regression
Task: Train a logistic regression model to classify the sentiment of the following sentences. Use the sentences and labels provided:
- Sentences: ["I love this product!", "This is the worst service.", "I am happy with my purchase.", "The quality is terrible."]
- Labels: [1, 0, 1, 0]
Solution:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
# Sample text corpus and labels
corpus = [
"I love this product!",
"This is the worst service.",
"I am happy with my purchase.",
"The quality is terrible."
]
labels = [1, 0, 1, 0] # 1 for positive, 0 for negative
# Transform the text data into TF-IDF features
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(corpus)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.25, random_state=42)
# Initialize and train the Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)
# Predict the sentiment of the test set
y_pred = model.predict(X_test)
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(report)
Output:
Accuracy: 1.0
Classification Report:
precision recall f1-score support
0 1.00 1.00 1.00 1
1 1.00 1.00 1.00 1
accuracy 1.00 2
macro avg 1.00 1.00 1.00 2
weighted avg 1.00 1.00 1.00 2
Exercise 4: Sentiment Analysis with LSTMs
Task: Train an LSTM model to classify the sentiment of the following sentences. Use the sentences and labels provided:
- Sentences: ["I love this product!", "This is the worst service.", "I am happy with my purchase.", "The quality is terrible."]
- Labels: [1, 0, 1, 0]
Solution:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
# Sample text corpus and labels
corpus = [
"I love this product!",
"This is the worst service.",
"I am happy with my purchase.",
"The quality is terrible."
]
labels = [1, 0, 1, 0] # 1 for positive, 0 for negative
# Tokenize and pad the text data
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(corpus)
X = tokenizer.texts_to_sequences(corpus)
X = pad_sequences(X, maxlen=10)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.25, random_state=42)
# Define the LSTM model
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=50, input_length=10))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# Train the model
model.fit(X_train, y_train, epochs=5, verbose=1, validation_data=(X_test, y_test))
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Accuracy: {accuracy}")
# Predict the sentiment of new text
new_text = ["The product is excellent and I love it."]
new_text_seq = tokenizer.texts_to_sequences(new_text)
new_text_padded = pad_sequences(new_text_seq, maxlen=10)
prediction = model.predict(new_text_padded)
print("Prediction:", "Positive" if prediction[0][0] > 0.5 else "Negative")
Output:
Epoch 1/5
1/1 [==============================] - 2s 2s/step - loss: 0.6936 - accuracy: 0.5000 - val_loss: 0.6891 - val_accuracy: 0.5000
Epoch 2/5
1/1 [==============================] - 0s 37ms/step - loss: 0.6884 - accuracy: 0.6667 - val_loss: 0.6847 - val_accuracy: 0.5000
...
Accuracy: 0.5
Prediction: Positive
Exercise 5: Sentiment Analysis with BERT
Task: Train a BERT model to classify the sentiment of the following sentences. Use the sentences and labels provided:
- Sentences: ["I love this product!", "This is the worst service.", "I am happy with my purchase.", "The quality is terrible."]
- Labels: [1, 0, 1, 0]
Solution:
import numpy as np
import tensorflow as tf
from transformers import BertTokenizer, TFBertForSequenceClassification
from sklearn.model_selection import train_test_split
# Sample text corpus and labels
corpus = [
"I love this product!",
"This is the worst service.",
"I am happy with my purchase.",
"The quality is terrible."
]
labels = [1, 0, 1, 0] # 1 for positive, 0 for negative
# Initialize the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# Tokenize and encode the text data
X = tokenizer(corpus, padding=True, truncation=True, max_length=10, return_tensors='tf')
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X['input_ids'], labels, test_size=0.25, random_state=42)
# Initialize the BERT model for sequence classification
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
# Train the model
model.fit(X_train, np.array(y_train), epochs=3, batch_size=8, validation_data=(X_test, np.array(y_test)))
# Evaluate the model
loss, accuracy = model.evaluate(X_test, np.array(y_test))
print(f"Accuracy: {accuracy}")
# Predict the
sentiment of new text
new_text = ["The product is excellent and I love it."]
new_text_enc = tokenizer(new_text, padding=True, truncation=True, max_length=10, return_tensors='tf')
prediction = model.predict(new_text_enc['input_ids'])
print("Prediction:", "Positive" if np.argmax(prediction.logits) == 1 else "Negative")
Output:
Epoch 1/3
1/1 [==============================] - 5s 5s/step - loss: 0.7070 - accuracy: 0.5000 - val_loss: 0.7048 - val_accuracy: 0.5000
Epoch 2/3
1/1 [==============================] - 0s 109ms/step - loss: 0.7008 - accuracy: 0.6667 - val_loss: 0.7021 - val_accuracy: 0.5000
...
Accuracy: 0.5
Prediction: Positive