Project: Analyzing Car Reviews with LLMs

Car-ing is sharing, an auto dealership company for car sales and rental, is taking their services to the next level thanks to Large Language Models (LLMs).

As their newly recruited AI and NLP developer, you've been asked to prototype a chatbot app with multiple functionalities that not only assist customers but also provide support to human agents in the company.

The solution should receive textual prompts and use a variety of pre-trained Hugging Face LLMs to respond to a series of tasks, e.g. classifying the sentiment in a car’s text review, answering a customer question, summarizing or translating text, etc.

# Import necessary packages
import pandas as pd
import torch

from transformers import logging
logging.set_verbosity(logging.WARNING)

# Loading Dataset
df = pd.read_csv("data/car_reviews.csv", delimiter=";")
df.head()

reviews = df['Review'].tolist()
labels = df['Class'].tolist()

# Sentiment Classification
# LLM into pipeline
from transformers import pipeline
classifier = pipeline(task='sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english')

# Inference on car reviews and shoe prediction results
predicted_labels = classifier(reviews)
for review, label, prediction in zip(reviews, labels, predicted_labels):
    print(f"Review: {review}\nActual Sentiment: {label}\nPredicted Sentiment: {prediction['label']}  (Confidence: {prediction['score']:.4f})\n")

# Accuracy and F1 score metrics
import evaluate
acc = evaluate.load("accuracy")
f1 = evaluate.load("f1")

# Sentiment labels to integers 
references = [1 if label == "POSITIVE" else 0 for label in labels]
predictions = [1 if label['label'] == "POSITIVE" else 0 for label in predicted_labels]

# Accuracy and f1
acc_results_dict = acc.compute(references=references, predictions=predictions)
accuracy_result = acc_results_dict['accuracy']
f1_result_dict = f1.compute(references=references, predictions=predictions)
f1_result = f1_result_dict['f1']

print(f"Accuracy: {accuracy_result}")
print(f"F1: {f1_result}")

# Translation
# Translation LLM to a pipeline to translate car reviews
from transformers import pipeline
import evaluate

# Assuming 'reviews' is defined elsewhere and contains at least one review
first_review = reviews[0]
translator = pipeline(task="translation", model="Helsinki-NLP/opus-mt-en-es")
translated_review = translator(first_review, max_length=30)[0]['translation_text']
print(f"Model Translation: \n{translated_review}")

# Reference translations from file
with open("data/reference_translations.txt", 'r') as file:
    lines = file.readlines()
references = [line.strip() for line in lines]
print(f"Spanish translation Ref: \n{references}")

# BLEU
bleu = evaluate.load("bleu")
# BLEU expects a list of predictions and a list of list of references (one list per prediction)
# So, for a single prediction, references should be a list of reference translations for that prediction
# If you have only one reference per prediction, wrap it in another list
bleu_score = bleu.compute(predictions=[translated_review], references=[references])
print(bleu_score['bleu'])

# Extractive QA
from transformers import AutoTokenizer, AutoModelForQuestionAnswering

# Instantiating Model & Tokenizer
model_Q = "deepset/minilm-uncased-squad2" 
tokenizer = AutoTokenizer.from_pretrained(model_Q)
model = AutoModelForQuestionAnswering.from_pretrained(model_Q)

# Context & Question - Tokenization
context = reviews[1]
print(f"Context:\n{context}")
question = "What did he like about the brand?"
inputs = tokenizer(question, context, return_tensors= "pt")

# Inference and Extract answers form RAW outputs
with torch.no_grad():
    outputs = model(**inputs)
start_idx = torch.argmax(outputs.start_logits)
end_idx = torch.argmax(outputs.end_logits) + 1
answer_span = inputs["input_ids"][0][start_idx:end_idx]

# Decode and show answer
answer = tokenizer.decode(answer_span)
print("Answer: ", answer)

# Original text for Summarize Car Reviews
text_to_summarize = reviews[-1]
print(f"Original Text:\n{text_to_summarize}")

# Summarization Pipeline for Inference
summarizer = pipeline(task = "summarization", model="cnicu/t5-small-booksum")
outputs = summarizer(text_to_summarize, max_length=55)
summarized_text = outputs[0]['summary_text']
print(f"Summarized Text:\n{summarized_text}")