Project: Analyzing Car Reviews with LLMs

Car-ing is sharing, an auto dealership company for car sales and rental, is taking their services to the next level thanks to Large Language Models (LLMs).

As their newly recruited AI and NLP developer, you've been asked to prototype a chatbot app with multiple functionalities that not only assist customers but also provide support to human agents in the company.

The solution should receive textual prompts and use a variety of pre-trained Hugging Face LLMs to respond to a series of tasks, e.g. classifying the sentiment in a car’s text review, answering a customer question, summarizing or translating text, etc.

Before you start

In order to complete the project you may wish to install some Hugging Face libraries such as transformers and evaluate.

!pip install transformers
!pip install evaluate==0.4.0
!pip install datasets==2.10.0
!pip install sentencepiece==0.1.97

from transformers import logging
logging.set_verbosity(logging.WARNING)

# Start your code here!
import pandas as pd
import evaluate
from transformers import pipeline

'''
Use a pre-trained LLM to classify the sentiment of the five car reviews in the car_reviews.csv dataset, and evaluate the classification accuracy and F1 score of predictions.
'''
# Load the sentiment-analysis pipeline
sentiment_analysis = pipeline(task='sentiment-analysis')

# Read the CSV file
csv_file_path = 'data/car_reviews.csv'
data = pd.read_csv(csv_file_path, sep=';')

accuracy = evaluate.load('accuracy')
f1 = evaluate.load('f1')

# # See the data
# print(data.head())

# perform sentiment analysis
predicted_labels = [sentiment_analysis(review) for review in data['Review']]
predicted_labels

for label in predicted_labels:
    print(label[0])

'''
Store the model outputs in predicted_labels, then extract the labels and map them onto a list of {0,1} integer binary labels called predictions
'''
# perform sentiment analysis
predicted_labels = [sentiment_analysis(review) for review in data['Review']]
label_map = {'NEGATIVE': 0, 'POSITIVE': 1 }
real =  [label_map[row] for row in data['Class']]
predictions =  [label_map[row[0]['label']] for row in predicted_labels]

predictions

real

'''
Store the calculated metrics in accuracy_result and f1_result
'''
accuracy_result = accuracy.compute(references=real, predictions=predictions)
f1_result = f1.compute(references=real, predictions=predictions)

'''
The company is recently attracting customers from Spain. 
Extract and pass the first two sentences of the first review in the dataset to an English-to-Spanish translation LLM. Calculate the BLEU score to assess translation quality, 
using the content in reference_translations.txt as references.
'''
# make en-to-es translator
translator = pipeline(task='translation', model='Helsinki-NLP/opus-mt-tc-big-en-es')

# Use the reference_translations.txt as references
references = []
with open('data/reference_translations.txt') as file:
    sentences = file.readlines()
    for sentence in sentences:
        cleaned_sentence = sentence.replace("\n", "")
        references.append(cleaned_sentence)

# pass the first two sentences of the first review to the translator
first_review = data['Review'][0]
first_two_sentences = first_review.split('.')[:2]

# Store the translated text generated by the LLM in translated_review
translated_review = [translator(sentence)[0]['translation_text'] for sentence in first_two_sentences]

# Store the BLEU score metric result in bleu_score
bleu = evaluate.load('bleu')
bleu_score = bleu.compute(references=references, predictions=translated_review)

'''
The 2nd review in the dataset emphasizes brand aspects. Load an extractive QA LLM such as "deepset/minilm-uncased-squad2" to formulate the question "What did he like about the brand?" and obtain an answer.
Use question and context for the two variables containing the LLM inputs: question and context.
Store the actual text answer in answer.
'''

# Load an extractive QA LLM such as "deepset/minilm-uncased-squad2"
qa_model = pipeline(task='question-answering', model="deepset/minilm-uncased-squad2")

# get the second review
second_review = data['Review'][1]

question = "What did he like about the brand?"
context = f"Here is a car review from our customer: {second_review}"

answer = qa_model(question = question, context = context)['answer']

'''
Summarize the last review in the dataset, into approximately 50-55 tokens long. Store it in the variable summarized_text.
'''
# summarize and limit to 55 tokens
summarizer = pipeline(task='summarization', max_length=60)

# get the last review
last_review = data['Review'][4]

summarized_text = summarizer(last_review)[0]['summary_text']

import pandas as pd
import torch

# Load the car reviews dataset
file_path = "data/car_reviews.csv"
df = pd.read_csv(file_path, delimiter=";")

# Put the car reviews and their associated sentiment labels in two lists
reviews = df['Review'].tolist()
real_labels = df['Class'].tolist()


# Instruction 1: sentiment classification

# Load a sentiment analysis LLM into a pipeline
from transformers import pipeline
classifier = pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english')

# Perform inference on the car reviews and display prediction results
predicted_labels = classifier(reviews)
for review, prediction, label in zip(reviews, predicted_labels, real_labels):
    print(f"Review: {review}\nActual Sentiment: {label}\nPredicted Sentiment: {prediction['label']} (Confidence: {prediction['score']:.4f})\n")

# Load accuracy and F1 score metrics    
import evaluate
accuracy = evaluate.load("accuracy")
f1 = evaluate.load("f1")

# Map categorical sentiment labels into integer labels
references = [1 if label == "POSITIVE" else 0 for label in real_labels]
predictions = [1 if label['label'] == "POSITIVE" else 0 for label in predicted_labels]

# Calculate accuracy and F1 score
accuracy_result_dict = accuracy.compute(references=references, predictions=predictions)
accuracy_result = accuracy_result_dict['accuracy']
f1_result_dict = f1.compute(references=references, predictions=predictions)
f1_result = f1_result_dict['f1']
print(f"Accuracy: {accuracy_result}")
print(f"F1 result: {f1_result}")


# Instruction 2: Translation

# Load translation LLM into a pipeline and translate car review
first_review = reviews[0]
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-es")
translated_review = translator(first_review, max_length=27)[0]['translation_text']
print(f"Model translation:\n{translated_review}")

# Load reference translations from file
with open("data/reference_translations.txt", 'r') as file:
    lines = file.readlines()
references = [line.strip() for line in lines]
print(f"Spanish translation references:\n{references}")

# Load and calculate BLEU score metric
bleu = evaluate.load("bleu")
bleu_score = bleu.compute(predictions=[translated_review], references=[references])
print(bleu_score['bleu'])


# Instruction 3: extractive QA

# Import auto classes (optional: can be solved via pipelines too)
from transformers import AutoTokenizer
from transformers import AutoModelForQuestionAnswering

# Instantiate model and tokenizer
model_ckp = "deepset/minilm-uncased-squad2"
tokenizer = AutoTokenizer.from_pretrained(model_ckp)
model = AutoModelForQuestionAnswering.from_pretrained(model_ckp)

# Define context and question, and tokenize them
context = reviews[1]
print(f"Context:\n{context}")
question = "What did he like about the brand?"
inputs = tokenizer(question, context, return_tensors="pt")

# Perform inference and extract answer from raw outputs
with torch.no_grad():
  outputs = model(**inputs)
start_idx = torch.argmax(outputs.start_logits)
end_idx = torch.argmax(outputs.end_logits) + 1
answer_span = inputs["input_ids"][0][start_idx:end_idx]

# Decode and show answer
answer = tokenizer.decode(answer_span)
print("Answer: ", answer)


# Instruction 4

# Get original text to summarize upon car review
text_to_summarize = reviews[-1]
print(f"Original text:\n{text_to_summarize}")

# Load summarization pipeline and perform inference
model_name = "cnicu/t5-small-booksum"
summarizer = pipeline("summarization", model=model_name)
outputs = summarizer(text_to_summarize, max_length=53)
summarized_text = outputs[0]['summary_text']
print(f"Summarized text:\n{summarized_text}")

Project: Analyzing Car Reviews with LLMs

.mfe-app-workspace-kj242g{position:absolute;top:-8px;}.mfe-app-workspace-11ezf91{display:inline-block;}.mfe-app-workspace-11ezf91:hover .Anchor__copyLink{visibility:visible;}Before you start

Before you start