Hugging Face AI Agents GAIA Challenge - Submitted Code

Successfully completed the Hugging Face AI Agents GAIA Challenge, where I built a fully autonomous agent using the Smolagents framework, capable of answering twenty level 1 questions from the GAIA benchmark using reasoning, file analysis, and external tools. This challenge was the final project in the comprehensive Hugging Face AI Agents course and required at least 30% of questions to be answered correctly and in the proper format in order to earn a certificate of completion.
import os
import base64
import re
import gradio as gr
import pandas as pd
import requests
from dotenv import load_dotenv
from huggingface_hub import login
from langchain_community.document_loaders import ArxivLoader, WikipediaLoader
from langchain_community.tools.tavily_search import TavilySearchResults
from openai import OpenAI
from smolagents import CodeAgent, HfApiModel
from smolagents.tools import tool
from youtube_transcript_api import YouTubeTranscriptApi

# (Keep Constants as is)
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

load_dotenv()

login(token=os.getenv("HUGGINGFACEHUB_API_TOKEN"))

OPENAI_KEY = os.getenv("OPENAI_KEY")
client = OpenAI(api_key=OPENAI_KEY)


@tool
def analyze_image(image_path: str, query: str) -> str:
    """
    Analyzes the content of an image using the OpenAI Vision model.
    
    Args:
        image_path: Path to the image file.
        query: What needs to be analyzed in the image.
    """
    with open(image_path, "rb") as f:
        image_data = f.read()
    base64_image = base64.b64encode(image_data).decode("utf-8")

    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "user", "content": [
                {"type": "text", "text": query},
                {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
            ]}
        ],
        max_tokens=500
    )
    return response.choices[0].message.content

@tool
def analyze_audio(audio_path: str, query: str) -> str:
    """
    Transcribes and analyzes the contents of an audio file.
    
    Args:
        audio_path: Path to an audio file (e.g., MP3 or WAV).
        query: What needs to be analyzed in the transcript.
    """
    with open(audio_path, "rb") as audio_file:
        transcript = client.audio.transcriptions.create(
            model="whisper-1",
            file=audio_file,
            response_format="text"
        )

    prompt = f"""Analyze the following audio transcription and answer the following query.
    Query:
    {query}
    Transcript:
    {transcript}
    """

    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=300
    )
    return response.choices[0].message.content

@tool
def analyze_excel(file_path: str, query: str) -> str:
    """
    Analyzes the content of an Excel file (xlsx files) and provides insights.
    
    Args:
        file_path: Path to the Excel file.
        query: What needs to be analyzed in the excel file.
    """
    df = pd.read_excel(file_path)
    table_str = df.head(10).to_markdown(index=False)

    prompt = f"""Analyze this dataset from an Excel file and answer the following query.
    Query:
    {query}
    Dataset:
    {table_str}
    """
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=500
    )
    return response.choices[0].message.content

@tool
def analyze_python_code(file_path: str, query: str) -> str:
    """
    Analyzes a Python file and provides answers to questions about it.
    Args:
        file_path: Path to the Python (.py) file.
        query: Specific question or analysis request about the code.
    """
    try:
        with open(file_path, "r") as file:
            code = file.read()
    except Exception as e:
        return f"Failed to read file: {e}"

    prompt = f"""Analyze the following Python code and answer the following query.
    Query:
    {query}
    Code:
    ```python
    {code}
    ```"""

    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": prompt}],
    )
    return response.choices[0].message.content

@tool
def analyze_youtube_transcript(video_id: str, query: str) -> str:
    """
    Fetches and analyzes the transcript of a YouTube video using OpenAI.
    
    Args:
        video_id: YouTube video ID (e.g., 'dQw4w9WgXcQ')
        query: What needs to be analyzed in the YouTube video transcript.
    """
    try:
        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
        full_transcript = " ".join([entry['text'] for entry in transcript_list][:100])

        prompt = f"""Analyze the following transcript from a YouTube video and answer the following query.
        Query:
        {query}
        Transcript:
        {full_transcript}
        """

        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt}],
            max_tokens=500
        )
        return response.choices[0].message.content
    except Exception as e:
        return f"Error fetching transcript: {e}"



@tool
def analyze_text(text: str, query: str) -> str:
    """Analyzes a block of text (e.g. web search results, transcripts, outputs from other tools, etc.) based on a given query.
    Args:
        text: the block of text to be analyzed.
        query: outlines how to analyze the text."""
    
    prompt = f"""
    Respond to the query: {query} using the following block of text:
    The text:
    {text}
    """
    completion = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[
        {
            "role": "user",
            "content": prompt
        }
    ]
    )
    return completion.choices[0].message.content


@tool
def web_search(query: str) -> str:
    """Performs a web search and returns top results.
    Args:
        query: the query to be searched up.
    """

    results = TavilySearchResults(max_results=3).invoke({"query": query})
    return results

@tool
def wiki_search(query: str) -> str:
    """Search Wikipedia for a query and return maximum 2 results. This text result can then be passed into the understand_text tool to 
    
    Args:
        query: The search query."""
    search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
    formatted_search_docs = "\n\n---\n\n".join(
        [
            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
            for doc in search_docs
        ])
    return {"wiki_results": formatted_search_docs}

@tool
def arvix_search(query: str) -> str:
    """Search Arxiv for a query and return maximum 3 result.
    
    Args:
        query: The search query."""
    search_docs = ArxivLoader(query=query, load_max_docs=3).load()
    formatted_search_docs = "\n\n---\n\n".join(
        [
            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
            for doc in search_docs
        ])
    return {"arvix_results": formatted_search_docs}

@tool
def llm_answer_if_possible(query: str) -> str:
    """
    Attempts to answer a question using an LLM (GPT-3.5 Turbo). 
    If the question can be answered using the model alone (no need for web search, external documents, or tools), 
    it outputs only the answer in the format specified by the question — no extra text. 
    If the question requires external information, returns an empty string.
    
    Args:
        query: The input question or prompt.
    """
    system_prompt = (
        "You are a careful assistant. If the question can be answered using only your internal knowledge "
        "(i.e. no web search, external files, or real-time information needed), then answer it strictly in the format requested. "
        "If it cannot be answered with certainty or requires external tools, respond with nothing at all."
    )
    
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": query}
        ],
        temperature=0
    )
    
    return response.choices[0].message.content.strip()




# --- Basic Agent Definition ---
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
class BasicAgent:
    def __init__(self):
        self.agent = CodeAgent(
            tools=[llm_answer_if_possible, analyze_text, wiki_search, arvix_search, web_search, analyze_image, analyze_audio, analyze_excel, analyze_python_code, analyze_youtube_transcript],
            model=HfApiModel(model="openchat/openchat-3.5-1210")
        )
        
    def __call__(self, question: str, task_id: str = None) -> str:
        file_path_info = ""
        if task_id:
            try:
                file_url = f"{DEFAULT_API_URL}/files/{task_id}"
                file_response = requests.get(file_url)
                if file_response.status_code == 200:
                    # Try to extract filename from headers, fallback to task_id
                    content_disposition = file_response.headers.get("content-disposition", "")
                    match = re.search(r'filename="(.+?)"', content_disposition)
                    filename = match.group(1) if match else f"{task_id}_file"
    
                    with open(filename, "wb") as f:
                        f.write(file_response.content)
    
                    print(f"Downloaded file for task {task_id} as {filename}")
                    file_path_info = f"\nThis question is accompanied by the file `{filename}` which can be analyzed with the appropriate tool.\n"
                else:
                    print(f"No file found for task {task_id} (status: {file_response.status_code})")
            except Exception as e:
                print(f"Error downloading file for task {task_id}: {e}")

        agent_prompt = f"""
        You are an AI assistant that must always begin by using a tool, specifically `llm_answer_if_possible()`. 
        You are not allowed to answer the question directly without using this tool first.
    
            TOOL USAGE RULES:
        1. The first tool you must call is `llm_answer_if_possible()`. 
           Pass into it exactly whatever follows the `QUESTION:` label below — no modifications.
        2. If `llm_answer_if_possible()` returns an answer, immediately pass that answer into the `final_answer()` tool and do not proceed further.
        3. If it returns an empty string, continue with other tools to answer the question.
        4. Always pass rich data (web, wiki, arxiv, transcripts) to `analyze_text()` if you need deeper understanding.
        5. When using `analyze_text`, phrase the `query` to closely mirror the user’s original question.
        6. Only call `web_search` and `wiki_search` once each. If you need more, analyze their outputs instead.
        
        DO NOT begin reasoning or planning until you have called `llm_answer_if_possible()` with the full unedited QUESTION text.
        {file_path_info}
        
        QUESTION:
        {question}
        """
        try:
            result = self.agent.run(agent_prompt)
            return result if isinstance(result, str) else str(result)
        except Exception as e:
            return f"Error: {e}"

def run_and_submit_all( profile: gr.OAuthProfile | None):
    """
    Fetches all questions, runs the BasicAgent on them, submits all answers,
    and displays the results.
    """
    # --- Determine HF Space Runtime URL and Repo URL ---
    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code

    if profile:
        username= f"{profile.username}"
        print(f"User logged in: {username}")
    else:
        print("User not logged in.")
        return "Please Login to Hugging Face with the button.", None

    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"

    # 1. Instantiate Agent ( modify this part to create your agent)
    try:
        agent = BasicAgent()
    except Exception as e:
        print(f"Error instantiating agent: {e}")
        return f"Error initializing agent: {e}", None
    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
    print(agent_code)

    # 2. Fetch Questions
    print(f"Fetching questions from: {questions_url}")
    try:
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions_data = response.json()
        if not questions_data:
             print("Fetched questions list is empty.")
             return "Fetched questions list is empty or invalid format.", None
        print(f"Fetched {len(questions_data)} questions.")
    except requests.exceptions.RequestException as e:
        print(f"Error fetching questions: {e}")
        return f"Error fetching questions: {e}", None
    except requests.exceptions.JSONDecodeError as e:
         print(f"Error decoding JSON response from questions endpoint: {e}")
         print(f"Response text: {response.text[:500]}")
         return f"Error decoding server response for questions: {e}", None
    except Exception as e:
        print(f"An unexpected error occurred fetching questions: {e}")
        return f"An unexpected error occurred fetching questions: {e}", None

    # 3. Run your Agent
    results_log = []
    answers_payload = []
    print(f"Running agent on {len(questions_data)} questions...")
    for item in questions_data:
        task_id = item.get("task_id")
        question_text = item.get("question")
        if not task_id or question_text is None:
            print(f"Skipping item with missing task_id or question: {item}")
            continue
        try:
            submitted_answer = agent(question_text, task_id=task_id)
            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
        except Exception as e:
             print(f"Error running agent on task {task_id}: {e}")
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})

    if not answers_payload:
        print("Agent did not produce any answers to submit.")
        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

    # 4. Prepare Submission 
    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
    print(status_update)

    # 5. Submit
    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
    try:
        response = requests.post(submit_url, json=submission_data, timeout=60)
        response.raise_for_status()
        result_data = response.json()
        final_status = (
            f"Submission Successful!\n"
            f"User: {result_data.get('username')}\n"
            f"Overall Score: {result_data.get('score', 'N/A')}% "
            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
            f"Message: {result_data.get('message', 'No message received.')}"
        )
        print("Submission successful.")
        results_df = pd.DataFrame(results_log)
        return final_status, results_df
    except requests.exceptions.HTTPError as e:
        error_detail = f"Server responded with status {e.response.status_code}."
        try:
            error_json = e.response.json()
            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
        except requests.exceptions.JSONDecodeError:
            error_detail += f" Response: {e.response.text[:500]}"
        status_message = f"Submission Failed: {error_detail}"
        print(status_message)
        results_df = pd.DataFrame(results_log)
        return status_message, results_df
    except requests.exceptions.Timeout:
        status_message = "Submission Failed: The request timed out."
        print(status_message)
        results_df = pd.DataFrame(results_log)
        return status_message, results_df
    except requests.exceptions.RequestException as e:
        status_message = f"Submission Failed: Network error - {e}"
        print(status_message)
        results_df = pd.DataFrame(results_log)
        return status_message, results_df
    except Exception as e:
        status_message = f"An unexpected error occurred during submission: {e}"
        print(status_message)
        results_df = pd.DataFrame(results_log)
        return status_message, results_df


# --- Build Gradio Interface using Blocks ---
with gr.Blocks() as demo:
    gr.Markdown("# Basic Agent Evaluation Runner")
    gr.Markdown(
        """
        **Instructions:**
        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
        ---
        **Disclaimers:**
        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
        """
    )

    gr.LoginButton()

    run_button = gr.Button("Run Evaluation & Submit All Answers")

    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
    # Removed max_rows=10 from DataFrame constructor
    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)

    run_button.click(
        fn=run_and_submit_all,
        outputs=[status_output, results_table]
    )

if __name__ == "__main__":
    print("\n" + "-"*30 + " App Starting " + "-"*30)
    # Check for SPACE_HOST and SPACE_ID at startup for information
    space_host_startup = os.getenv("SPACE_HOST")
    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup

    if space_host_startup:
        print(f"✅ SPACE_HOST found: {space_host_startup}")
        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
    else:
        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")

    if space_id_startup: # Print repo URLs if SPACE_ID is found
        print(f"✅ SPACE_ID found: {space_id_startup}")
        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
    else:
        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")

    print("-"*(60 + len(" App Starting ")) + "\n")

    print("Launching Gradio Interface for Basic Agent Evaluation...")
    demo.launch(debug=True, share=False)
Hugging Face AI Agents GAIA Challenge - Submitted Code

.mfe-app-workspace-kj242g{position:absolute;top:-8px;}.mfe-app-workspace-11ezf91{display:inline-block;}.mfe-app-workspace-11ezf91:hover .Anchor__copyLink{visibility:visible;}Hugging Face AI Agents GAIA Challenge - Submitted Code

Hugging Face AI Agents GAIA Challenge - Submitted Code