Skip to content

Mechanics:

  1. Scrape all Pokemon data (number, name, type, stats) from Pokemon database (https://pokemondb.net/pokedex/all).
  2. Store the results in an SQL database.
import requests
from bs4 import BeautifulSoup
import pandas as pd
from sqlalchemy import create_engine

# Scrape data from the Pokemon database website
url = "https://pokemondb.net/pokedex/all"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
table = soup.find('table', {'id': 'pokedex'})
rows = table.find_all('tr')

# Extract table headers
header = [th.text for th in rows[0].find_all('th')]

# Extract table data
data = []
for row in rows[1:]:
    cols = row.find_all('td')
    cols = [ele.text.strip() for ele in cols]
    data.append([ele for ele in cols if ele])

# Create a DataFrame from the scraped data
pokemon_df = pd.DataFrame(data, columns=header)

# Create SQLite database and store data
engine = create_engine('sqlite:///pokemon.db')
pokemon_df.to_sql('pokemon', con=engine, if_exists='replace', index=False)

# Indicate that the data has been scraped and stored
print("Data scraped and stored in 'pokemon.db'")
query = "SELECT * FROM pokemon"
df_display = pd.read_sql(query, con=engine)

# Display the result
print(df_display)
pokemon_df.info()
# Change to appropriate data type

pokemon_df = pokemon_df.astype({
    '#': 'int',          
    'Name': 'string',    
    'Type': 'string',    
    'Total': 'int',      
    'HP': 'int',         
    'Attack': 'int',     
    'Defense': 'int',    
    'Sp. Atk': 'int',    
    'Sp. Def': 'int',    
    'Speed': 'int'
})

pokemon_df.info()
import matplotlib.pyplot as plt

# Split the 'Type' column by spaces, explode the list into separate rows, and strip any extra whitespace
types = pokemon_df['Type'].str.split(' ').explode().str.strip()

# Count the frequency of each type
type_counts = types.value_counts()

# Plot the frequency of Pokémon types
plt.figure(figsize=(12, 6))
type_counts.plot(kind='bar')
plt.title('Pokémon Types')
plt.xlabel('Type')
plt.ylabel('Frequency')
plt.xticks(rotation=45)
plt.show()