Skip to content
Haystack AI Tutorial
!pip install -qU \
"haystack-ai[agentst]" \
tavily-python \
openai \
more_itertools
Hidden output
from haystack.dataclasses import Document
from typing import List, Any, Dict
docs: List[Document] = [
Document(content="Islamabad experiences a humid subtropical climate with hot summers and mild winters."),
Document(content="Peak tourist season in Islamabad is during spring (March to May) and autumn (September to November) due to pleasant weather."),
Document(content="Faisal Mosque, one of the largest mosques in the world, is an iconic landmark in Islamabad designed by Turkish architect Vedat Dalokay."),
Document(content="Islamabad was purpose-built as the capital of Pakistan in the 1960s, designed by Greek architect Constantinos Apostolos Doxiadis."),
Document(content="The city is known for its well-planned infrastructure, wide roads, and green spaces, making it one of the most organized cities in Pakistan."),
]
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.embedders import OpenAIDocumentEmbedder
from haystack.components.writers import DocumentWriter
from haystack import Pipeline, component
document_store = InMemoryDocumentStore(embedding_similarity_function="cosine")
indexing_pipeline = Pipeline()
indexing_pipeline.add_component("embedder", OpenAIDocumentEmbedder(model="text-embedding-3-small"))
indexing_pipeline.add_component("writer", DocumentWriter(document_store=document_store))
indexing_pipeline.connect("embedder", "writer")
indexing_pipeline.run({"embedder": {"documents": docs}})
from haystack.tools import ComponentTool
from haystack.components.embedders import OpenAIDocumentEmbedder, OpenAITextEmbedder
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
@component()
class RagSearcher:
"""Query ‑> top‑k docs from the private store"""
def __init__(self, document_store, top_k: int = 3):
self.text_embedder = OpenAITextEmbedder(model="text-embedding-3-small")
self.retriever = InMemoryEmbeddingRetriever(document_store=document_store, top_k=top_k)
@component.output_types(documents=List[Document])
def run(self, text: str) -> Dict[str, Any]:
emb_out = self.text_embedder.run(text=text)
docs_out = self.retriever.run(query_embedding=emb_out["embedding"])
return {"documents": docs_out["documents"]}
rag_tool = ComponentTool(
component=RagSearcher(document_store),
name="rag_search",
description="Semantic search over the Islamabad knowledge base."
)import requests
from haystack import component
from haystack.dataclasses import Document
from typing import List
@component
class TavilyWebSearch:
def __init__(self, api_key: str, top_k: int = 3):
self.api_key = api_key
self.top_k = top_k
def run(self, query: str):
resp = requests.post(
"https://api.tavily.com/search",
json={
"api_key": self.api_key,
"query": query,
"max_results": self.top_k,
"include_answer": True,
},
timeout=15,
)
resp.raise_for_status()
data = resp.json()
docs: List[Document] = []
if answer := data.get("answer"):
docs.append(Document(content=answer, meta={"source": "tavily:direct_answer"}))
for hit in data.get("results", []):
docs.append(
Document(
content=hit["content"],
meta={
"title": hit["title"],
"url": hit["url"],
},
)
)
return {"documents": docs}
import os
web_tool = ComponentTool(
component=TavilyWebSearch(api_key=os.environ["TAVILY_API_KEY"], top_k=3),
name="web_search",
description="Live web search via Tavily ."
)
from haystack.components.generators.chat import OpenAIChatGenerator
from haystack.components.agents import Agent
generator = OpenAIChatGenerator(model="gpt-4.1-mini")
system_prompt = """
You are a helpful assistant.
- Use rag_search first to retrieve information from the knowledge base.
- Use web_search only when the query requires fresh, real-time, or external information (e.g., weather, breaking news).
"""
agent = Agent(
chat_generator=generator,
system_prompt=system_prompt,
tools=[rag_tool, web_tool],
)
from haystack.dataclasses import ChatMessage
msg = ChatMessage.from_user("What is the peak tourist season in Islamabad?")
resp = agent.run(messages=[msg])
print(resp["messages"][-1].text)
def tools_used(run_output: dict) -> list[str]:
seen, ordered = set(), []
for msg in run_output["messages"]:
for call in msg.tool_calls:
if call.tool_name not in seen:
ordered.append(call.tool_name)
seen.add(call.tool_name)
return ordered
print("Tools invoked →", tools_used(resp))msg = ChatMessage.from_user("What is the temperature in Islamabad now?")
resp = agent.run(messages=[msg])
print(resp["messages"][-1].text)
print("Tools invoked →", tools_used(resp))