Project: Analyzing Car Reviews with LLMs

Car-ing is sharing, an auto dealership company for car sales and rental, is taking their services to the next level thanks to Large Language Models (LLMs).

As their newly recruited AI and NLP developer, you've been asked to prototype a chatbot app with multiple functionalities that not only assist customers but also provide support to human agents in the company.

The solution should receive textual prompts and use a variety of pre-trained Hugging Face LLMs to respond to a series of tasks, e.g. classifying the sentiment in a car’s text review, answering a customer question, summarizing or translating text, etc.

# IMPORTS
import pandas as pd
import torch
from transformers import (
    AutoModelForSequenceClassification,
    AutoModelForQuestionAnswering,
    AutoTokenizer,
    pipeline
)
import evaluate
from transformers import logging
logging.set_verbosity(logging.WARNING)

# IMPORTS
import pandas as pd
import torch
from transformers import (
    AutoModelForSequenceClassification,
    AutoModelForQuestionAnswering,
    AutoTokenizer,
    pipeline
)
import evaluate
from transformers import logging
logging.set_verbosity(logging.WARNING)

# LOAD DATA
car_reviews = pd.read_csv("data/car_reviews.csv", sep=";")
review_texts = car_reviews["Review"].tolist()
review_class = car_reviews["Class"].tolist()

# ------------------------------
# SENTIMENT CLASSIFICATION + METRICS
# ------------------------------
classifier = pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english')
predicted_labels = classifier(review_texts)
for review, prediction, label in zip(review_texts, predicted_labels, review_class):
    print(f"Review: {review}\nActual Sentiment: {label}\nPredicted Sentiment: {prediction['label']} (Confidence: {prediction['score']:.4f})\n")

references = [1 if label == "POSITIVE" else 0 for label in review_class]
predictions = [1 if label["label"] == "POSITIVE" else 0 for label in predicted_labels]

accuracy = evaluate.load("accuracy")
f1 = evaluate.load("f1")

accuracy_result = accuracy.compute(references=references, predictions=predictions)["accuracy"]
f1_result = f1.compute(references=references, predictions=predictions)["f1"]

print("Accuracy:", accuracy_result)
print("F1 Score:", f1_result)

# ------------------------------
# TRANSLATION + BLEU EVALUATION
# ------------------------------
# Extract first 2 sentences from the first review
first_review = review_texts[0]
sentences = first_review.split(".")
review_text = ". ".join(sentences[:2]).strip() + "."

with open("data/reference_translations.txt", 'r') as file:
    lines = file.readlines()
references = [line.strip() for line in lines[:2]]

translator = pipeline(task="translation", model="Helsinki-NLP/opus-mt-en-es")
translated_review = translator(first_review, max_length=27)[0]["translation_text"]

bleu = evaluate.load("bleu")
bleu_score = bleu.compute(predictions=[translated_review], references=[references])
print("BLEU Score:", bleu_score["bleu"])

# ------------------------------
# EXTRACTIVE QUESTION ANSWERING
# ------------------------------
model_ckp = "deepset/minilm-uncased-squad2"
tokenizer = AutoTokenizer.from_pretrained(model_ckp)
model = AutoModelForQuestionAnswering.from_pretrained(model_ckp)

# Define context and question, and tokenize them
context = review_texts[1]
print(f"Context:\n{context}")
question = "What did he like about the brand?"
inputs = tokenizer(question, context, return_tensors="pt")

# Perform inference and extract answer from raw outputs
with torch.no_grad():
  outputs = model(**inputs)
start_idx = torch.argmax(outputs.start_logits)
end_idx = torch.argmax(outputs.end_logits) + 1
answer_span = inputs["input_ids"][0][start_idx:end_idx]

# Decode and show answer
answer = tokenizer.decode(answer_span)
print("Answer: ", answer)

# ------------------------------
# SUMMARIZATION + TOXICITY + REGARD
# ------------------------------
text_to_summarize = review_texts[-1]
summarizer = pipeline(task="summarization", model="cnicu/t5-small-booksum")
summary_output = summarizer(text_to_summarize, max_length=53)
summarized_text = summary_output[0]["summary_text"]
print("Summary:", summarized_text)

# Toxicity
toxicity = evaluate.load("toxicity")
toxicity_result = toxicity.compute(predictions=[summarized_text], aggregation="maximum")
print("Toxicity:", toxicity_result["max_toxicity"])

# Regard
regard = evaluate.load("regard")
regard_result = regard.compute(data=[summarized_text])
for entry in regard_result["regard"][0]:
    print(f"{entry['label'].capitalize()} regard: {entry['score']:.2f}")