Project: Analyzing Car Reviews with LLMs

Car-ing is sharing, an auto dealership company for car sales and rental, is taking their services to the next level thanks to Large Language Models (LLMs).

As their newly recruited AI and NLP developer, you've been asked to prototype a chatbot app with multiple functionalities that not only assist customers but also provide support to human agents in the company.

The solution should receive textual prompts and use a variety of pre-trained Hugging Face LLMs to respond to a series of tasks, e.g. classifying the sentiment in a car’s text review, answering a customer question, summarizing or translating text, etc.

!pip install transformers
!pip install evaluate==0.4.0
!pip install datasets==2.10.0
!pip install sentencepiece==0.1.97
!pip install xformers
from transformers import logging
logging.set_verbosity(logging.WARNING)

Hidden output

import pandas as pd
import nltk
from nltk.tokenize import sent_tokenize
from transformers import pipeline, logging
import evaluate
from sklearn.metrics import accuracy_score, f1_score

nltk.download('punkt', quiet=True)

df = pd.read_csv('data/car_reviews.csv', delimiter=';', quotechar='"', encoding='utf-8-sig')
df.columns = ['review', 'class']
df['class'] = df['class'].str.strip().str.lower().map({'positive': 1, 'negative': 0})

sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
predicted_labels = sentiment_analyzer(df['review'].tolist(), truncation=True, batch_size=4)
predictions = [1 if res['label'] == 'POSITIVE' else 0 for res in predicted_labels]

accuracy_result = accuracy_score(df['class'], predictions)
f1_result = f1_score(df['class'], predictions)

first_review = df['review'].iloc[0]
sentences = sent_tokenize(first_review)
first_two_sentences = ' '.join(sentences[:2]) if len(sentences) >=2 else first_review

translator = pipeline("translation_en_to_es", model="Helsinki-NLP/opus-mt-en-es")
translation_result = translator(first_two_sentences, truncation=True, max_length=512)
translated_review = translation_result[0]['translation_text']

with open('data/reference_translations.txt', 'r', encoding='utf-8') as f:
    reference_translations = [line.strip() for line in f]

bleu = evaluate.load("bleu")
bleu_score = bleu.compute(
    predictions=[translated_review],
    references=[reference_translations]  
)

context = df['review'].iloc[1]
question = "What did he like about the brand?"

qa_model = pipeline("question-answering", model="deepset/minilm-uncased-squad2")
qa_result = qa_model(question=question, context=context, truncation=True)
answer = qa_result['answer'] if qa_result['score'] > 0.01 else "No relevant answer found"

last_review = df['review'].iloc[-1]
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

word_count = len(last_review.split())
max_len = min(55, max(40, int(word_count * 0.3)))
summary_result = summarizer(last_review, max_length=max_len, min_length=int(max_len*0.7), truncation=True)
summarized_text = summary_result[0]['summary_text']

print("=== Final Results ===")
print(f"1. Sentiment Analysis\n   Accuracy: {accuracy_result:.4f}\n   F1 Score: {f1_result:.4f}")
print(f"\n2. Translation\n   BLEU Scores: {bleu_score}")  
print(f"\n3. QA Answer\n   '{answer}'")
print(f"\n4. Summarization\n   '{summarized_text}'")

Working on Packaging as FasAPI server with specific endpoints for those tasks

!pip install fastapi
!pip install uvicorn
!pip install pydantic

!pip install nest_asyncio

import nest_asyncio
import uvicorn
nest_asyncio.apply()

# Now you can run uvicorn
import uvicorn
from multiprocessing import Process
import time

server_process = Process(target=uvicorn.run, kwargs={"app": "app:app", "host": "0.0.0.0", "port": 8000})
server_process.start()


time.sleep(2)
print("Server should be running at http://localhost:8000")

# stop it with:
#server_process.terminate()