Skip to content
1 hidden cell
Project: Classifying Emails using Llama
1 hidden cell
# Run the following cells first
# Install necessary packages
!pip install llama-cpp-python==0.2.82 -q -q -q# Download the model
!wget -q https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf?download=true -O model.gguf# Import required libraries
import pandas as pd
from llama_cpp import Llama# Load the email dataset
emails_df = pd.read_csv('data/email_categories_data.csv')
# Display the first few rows of our dataset
print("Preview of our email dataset:")
emails_df.head(2)# Loading and setting up the Llama model
llm = Llama(model_path = "model.gguf")
# 2 Designing the prompt
# Create an effective prompt that guides the model's behavior for both email classification and conversation.
"""Create a variable prompt containing the prompt asking to classify the emails saved in the data/email_categories_data.csv dataset into the three categories ("Priority", "Updates", or "Promotions").
"""
prompt = """You classify emails into categories: Priority, Updates, or Promotions.
Q: Important - Billing Issue
Your payment failed. Please update your billing details immediately.
Response: Priority
Q: 50% Off Spring Collection!\nDon't miss our biggest sale of the season! All spring items half off. Limited time offer.
Response: Promotions
Q: Canceled Event - Team Meeting
Response: Updates
Q:"""#3 Processing messages
# Implement the main function that processes messages and returns appropriate responses.
# Implementing e-mail classification
# Processing the messages
def process_message(llm:Llama, message, prompt):
# Send the message to the LLM
# Setting the parameters
input_prompt = f"{prompt} {message}"
#output = llm(input_prompt)
output = llm(prompt, max_tokens=5, temperature=0.0, stop=["Q:", "\n"])
print(output)
# Return the LLM response - Extract the generated content from the LLM response.
result = output['choices'][0]['text'].strip()
return result# 4 Testing the model
"""
Test the model on the first two emails from the dataset, storing the corresponding classification results as strings in two variables named result1 and result2.
"""
# Extract a subset of test emails from the dataset, classify them using the Llama model, and save the results.
# Extracting test emails
# Use .head() to select the first two rows from the dataset for testing
test_emails = emails_df.head(2)
# Initialize result variables
result1 = ""
result2 = ""
# Iterating through test e-mails
for index, row in test_emails.iterrows():
# Classifying emails
# Use the function you defined to extract classification
# categories given the model, email content, and prompt.
email_content = row.email_content
expected_category = row.expected_category
category = process_message(llm, email_content, prompt)
# Store the results in the corresponding variables
if index == 0:
result1 = category
elif index == 1:
result2 = category
print(f"result1 {result1} result2 {result2}")