You're working for a well-known car manufacturer who is looking at implementing LLMs into vehicles to provide guidance to drivers. You've been asked to experiment with integrating car manuals with an LLM to create a context-aware chatbot. They hope that this context-aware LLM can be hooked up to a text-to-speech software to read the model's response aloud.
As a proof of concept, you'll integrate several pages from a car manual that contains car warning messages and their meanings and recommended actions. This particular manual, stored as an HTML file, mg-zs-warning-messages.html, is from an MG ZS, a compact SUV. Armed with your newfound knowledge of LLMs and LangChain, you'll implement Retrieval Augmented Generation (RAG) to create the context-aware chatbot.
#Run this cell to install the necessary packages
import subprocess
import pkg_resources
def install_if_needed(package, version):
'''Function to ensure that the libraries used are consistent to avoid errors.'''
try:
pkg = pkg_resources.get_distribution(package)
if pkg.version != version:
raise pkg_resources.VersionConflict(pkg, version)
except (pkg_resources.DistributionNotFound, pkg_resources.VersionConflict):
subprocess.check_call(["pip", "install", f"{package}=={version}"])
install_if_needed("langchain-core", "0.3.18")
install_if_needed("langchain-openai", "0.2.8")
install_if_needed("langchain-community", "0.3.7")
install_if_needed("unstructured", "0.14.4")
install_if_needed("langchain-chroma", "0.1.4")
install_if_needed("langchain-text-splitters", "0.3.2")# Start coding here, use as many cells as you like
import os
from langchain_community.document_loaders import UnstructuredHTMLLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.runnables import RunnableMap
from langchain_core.output_parsers import StrOutputParser
openai_api_key = os.environ.get("OPENAI_API_KEY")
loader = UnstructuredHTMLLoader(file_path="data/mg-zs-warning-messages.html")
car_docs = loader.load()
splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
separators=["\n\n", "\n", ".", " "]
)
chunks = splitter.split_documents(car_docs)
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
vector_store = Chroma.from_documents(
documents=chunks,
embedding=embedding_model
)
retriever = vector_store.as_retriever(
search_type="similarity",
search_kwargs={"k":3}
)
prompt_template = ChatPromptTemplate.from_template("""
Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say you don't know.
Context: {context}
Question: {question}
""")
llm = ChatOpenAI(
model="gpt-4o-mini",
temperature=0,
api_key=openai_api_key
)
rag_chain = (
{"question": RunnablePassthrough(), "context": retriever}
| prompt_template
| llm
| StrOutputParser()
)
prompt = """
The Gasoline Particular Filter Full warning has appeared. What does this mean and what should I do about it?
"""
answer = rag_chain.invoke(prompt)
print(answer)