Skip to content
Introduction
Here I'm going to practice some queries and let's see what we can find about this dataset.
import pandas as pd
import matplotlib.pyplot as plt
Revenue per State
Unknown integration
DataFrameavailable as
df
variable
SELECT sum(a.list_price) as Revenue, c.store_name as Store, c.state
FROM sales.order_items AS a
INNER JOIN sales.orders AS b
ON a.order_id = b.order_id
INNER JOIN sales.stores AS c
ON b.store_id = c.store_id
GROUP BY c.store_name,c.state
ORDER BY Revenue DESC
import plotly.express as px
fig = px.pie(df, values='Revenue', names='state')
fig.update_traces(textposition='inside')
fig.update_layout(uniformtext_minsize=14, uniformtext_mode='hide')
fig.show()
Revenue for each Year
Unknown integration
DataFrameavailable as
df
variable
SELECT
YEAR(b.required_date) AS Sales_year,
SUM(a.list_price) as Revenue
FROM
sales.orders AS b
INNER JOIN
sales.order_items AS a
ON
b.order_id = a.order_id
GROUP BY
YEAR(b.required_date)
ORDER BY
YEAR(b.required_date)
fig = px.bar(df, x="Sales_year", y="Revenue", barmode='group')
fig.update_xaxes(type='category')
fig.show()
Revenue for each Store in each Year
Unknown integration
DataFrameavailable as
df
variable
SELECT
YEAR(b.required_date) AS Sales_year,
SUM(a.list_price) as Revenue,
c.state as State
FROM
sales.orders AS b
INNER JOIN
sales.order_items AS a
ON
b.order_id = a.order_id
INNER JOIN
sales.stores AS c
ON
b.store_id = c.store_id
GROUP BY
YEAR(b.required_date), c.state
ORDER BY
YEAR(b.required_date)
import plotly.express as px
fig = px.bar(df, x="Sales_year", y="Revenue", color='State',barmode='group')
fig.show()
Best Seller Models
Unknown integration
DataFrameavailable as
df
variable
SELECT sum(a.quantity) as Quantity, b.product_name as Model
FROM sales.order_items AS a
INNER JOIN production.products AS b
ON a.product_id = b.product_id
GROUP BY b.product_name
ORDER BY Quantity DESC
# Get top 10
df = df.head(10)
# Sort dataframe
df.sort_values(by=['Quantity'], inplace=True, ascending=True)
# Create chart
ax = df.plot.barh(x='Model', y='Quantity', color='limegreen',figsize=(8,4))
# Define Title
plt.title('Best Seller Model',fontsize = 12,fontweight='bold')
# Define axis names
plt.xlabel('Quantity',fontsize=10,fontweight='bold')
plt.ylabel('Model',fontsize=10,fontweight='bold')
# Define axis names
plt.xlabel('Quantity')
plt.ylabel('Model')
# Set labels for each bar
y = list(df['Quantity'])
for i, v in enumerate(y):
ax.text(v + 10, i-0.1, str(v), color='black',fontsize = 9,fontweight='bold')
for spine in plt.gca().spines.values():
spine.set_visible(False)
# Drop the legend
ax.get_legend().remove()
Best Seller Brands