Skip to content
New Workbook
Sign up
[practicing] Bicycle Sales Database

Introduction

Here I'm going to practice some queries and let's see what we can find about this dataset.

import pandas as pd
import matplotlib.pyplot as plt

Revenue per State

Spinner
DataFrameavailable as
df
variable
SELECT sum(a.list_price) as Revenue, c.store_name as Store, c.state
FROM sales.order_items AS a
INNER JOIN sales.orders AS b
	ON a.order_id = b.order_id
INNER JOIN sales.stores AS c
	ON b.store_id = c.store_id
    
GROUP BY c.store_name,c.state
ORDER BY Revenue DESC
import plotly.express as px

fig = px.pie(df, values='Revenue', names='state')
fig.update_traces(textposition='inside')
fig.update_layout(uniformtext_minsize=14, uniformtext_mode='hide')
fig.show()

Revenue for each Year

Spinner
DataFrameavailable as
df
variable
SELECT
    YEAR(b.required_date) AS Sales_year,
    SUM(a.list_price) as Revenue
FROM
	sales.orders AS b
INNER JOIN
	sales.order_items AS a
ON
	b.order_id = a.order_id
GROUP BY
	YEAR(b.required_date)
ORDER BY
    YEAR(b.required_date)
fig = px.bar(df, x="Sales_year", y="Revenue", barmode='group')
fig.update_xaxes(type='category')
fig.show()

Revenue for each Store in each Year

Spinner
DataFrameavailable as
df
variable
SELECT
    YEAR(b.required_date) AS Sales_year,
    SUM(a.list_price) as Revenue,
    c.state as State
FROM
	sales.orders AS b
INNER JOIN
	sales.order_items AS a
ON
	b.order_id = a.order_id
INNER JOIN
    sales.stores AS c
ON
    b.store_id = c.store_id
GROUP BY
	YEAR(b.required_date), c.state
ORDER BY
    YEAR(b.required_date)
import plotly.express as px

fig = px.bar(df, x="Sales_year", y="Revenue", color='State',barmode='group')
fig.show()

Best Seller Models

Spinner
DataFrameavailable as
df
variable
SELECT sum(a.quantity) as Quantity, b.product_name as Model
FROM sales.order_items AS a
INNER JOIN production.products AS b
	ON a.product_id = b.product_id
    
GROUP BY b.product_name
ORDER BY Quantity DESC
# Get top 10 
df = df.head(10)

# Sort dataframe
df.sort_values(by=['Quantity'], inplace=True, ascending=True)

# Create chart
ax = df.plot.barh(x='Model', y='Quantity', color='limegreen',figsize=(8,4))

# Define Title
plt.title('Best Seller Model',fontsize = 12,fontweight='bold')

# Define axis names
plt.xlabel('Quantity',fontsize=10,fontweight='bold')
plt.ylabel('Model',fontsize=10,fontweight='bold')

# Define axis names
plt.xlabel('Quantity')
plt.ylabel('Model')

# Set labels for each bar
y = list(df['Quantity'])
for i, v in enumerate(y):
    ax.text(v + 10, i-0.1, str(v), color='black',fontsize = 9,fontweight='bold')
    
for spine in plt.gca().spines.values():
    spine.set_visible(False)
    
# Drop the legend
ax.get_legend().remove()

Best Seller Brands