Project: Classifying Emails using Llama

1 hidden cell

# Run the following cells first
# Install necessary packages
!pip install llama-cpp-python==0.2.82 -q -q -q

# Download the model
!wget -q https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf?download=true -O model.gguf

# Import required libraries
import pandas as pd
from llama_cpp import Llama

# Load the email dataset
emails_df = pd.read_csv('data/email_categories_data.csv')
# Display the first few rows of our dataset
print("Preview of our email dataset:")
emails_df.head(2)

# Loading and setting up the Llama model
llm = Llama(model_path = "model.gguf")

# 2 Designing the prompt
# Create an effective prompt that guides the model's behavior for both email classification and conversation.
"""Create a variable prompt containing the prompt asking to classify the emails saved in the data/email_categories_data.csv dataset into the three categories ("Priority", "Updates", or "Promotions").
"""
prompt = """You classify emails into categories: Priority, Updates, or Promotions.
Q: Important - Billing Issue
Your payment failed. Please update your billing details immediately.
Response: Priority
Q: 50% Off Spring Collection!\nDon't miss our biggest sale of the season! All spring items half off. Limited time offer.
Response: Promotions
Q: Canceled Event - Team Meeting
Response: Updates
Q:"""

#3 Processing messages 

# Implement the main function that processes messages and returns appropriate responses.

# Implementing e-mail classification

# Processing the messages
def process_message(llm:Llama, message, prompt):
  # Send the message to the LLM
  # Setting the parameters
  input_prompt = f"{prompt} {message}"
  #output = llm(input_prompt)
  output = llm(prompt, max_tokens=5, temperature=0.0, stop=["Q:", "\n"])
  print(output)
    
  # Return the LLM response - Extract the generated content from the LLM response.
  result = output['choices'][0]['text'].strip()
  return result

# 4 Testing the model
"""
Test the model on the first two emails from the dataset, storing the corresponding classification results as strings in two variables named result1 and result2.
"""
# Extract a subset of test emails from the dataset, classify them using the Llama model, and save the results.
# Extracting test emails
# Use .head() to select the first two rows from the dataset for testing
test_emails = emails_df.head(2)

# Initialize result variables
result1 = ""
result2 = ""

# Iterating through test e-mails
for index, row in test_emails.iterrows():      
  # Classifying emails 
  # Use the function you defined to extract classification 
  # categories given the model, email content, and prompt.
  email_content = row.email_content
  expected_category = row.expected_category
  category = process_message(llm, email_content, prompt)
  
  # Store the results in the corresponding variables
  if index == 0:
    result1 = category
  elif index == 1:
    result2 = category
    
print(f"result1 {result1} result2 {result2}")