Skip to content
sql-e commerce analysis
Unknown table
DataFrameas
df
variable
-- How many orders are there overall?
SELECT COUNT(*)
FROM shipping_ecommerce.csv;DataFrameas
df2
variable
-- Which shipment modes are used the most?
SELECT Mode_of_Shipment, COUNT (*)
FROM shipping_ecommerce.csv
GROUP BY Mode_of_Shipment
ORDER BY Mode_of_Shipment DESC;
DataFrameas
df3
variable
-- What’s the average discount offered per shipment mode?
SELECT Mode_of_Shipment, ROUND(AVG(Discount_offered), 2) AS avg_discount
FROM shipping_ecommerce.csv
GROUP BY Mode_of_Shipment;DataFrameas
df4
variable
-- Which warehouse blocks ship the most items?
SELECT Warehouse_block, count(*) AS orders_shipped
FROM shipping_ecommerce.csv
GROUP BY Warehouse_block
ORDER BY orders_shipped DESC
LIMIT 1;DataFrameas
df5
variable
-- Is there any pattern between customer care calls and product return (Class = 1)?
SELECT Customer_care_calls, count(*) AS orders,
SUM(CASE WHEN Class = 1 THEN 1 ELSE 0 END) AS returns,
SUM(CASE WHEN Class = 0 THEN 1 ELSE 0 END) AS no_return,
ROUND(100.0 * SUM(CASE WHEN Class = 1 THEN 1 ELSE 0 END)/COUNT(*), 2) AS return_rate
FROM shipping_ecommerce.csv
GROUP BY Customer_care_calls
ORDER BY Customer_care_calls;DataFrameas
df6
variable
-- Which customer ratings are associated with returns vs. no returns?
SELECT Customer_rating,
count(*) AS orders,
SUM(CASE WHEN Class = 1 THEN 1 ELSE 0 END) AS returns,
ROUND(100.0 * SUM(CASE WHEN Class = 1 THEN 1 ELSE 0 END)/COUNT(*), 2) AS return_rate
FROM shipping_ecommerce.csv
GROUP BY Customer_rating
ORDER BY Customer_rating;DataFrameas
df7
variable
-- Do different genders show different return behavior?
SELECT Gender,
count(*) AS orders,
SUM(CASE WHEN Class = 1 THEN 1 ELSE 0 END) AS returns,
ROUND(100.0 * SUM(CASE WHEN Class = 1 THEN 1 ELSE 0 END)/COUNT(*), 2) AS return_rate
FROM shipping_ecommerce.csv
GROUP BY Gender
ORDER BY Gender;DataFrameas
df8
variable
-- Is there a connection between prior purchases and returns?
SELECT Prior_purchases,
count(*) AS orders,
SUM(CASE WHEN Class = 1 THEN 1 ELSE 0 END) AS returns,
ROUND(100.0 * SUM(CASE WHEN Class = 1 THEN 1 ELSE 0 END)/COUNT(*), 2) AS return_rate
FROM shipping_ecommerce.csv
GROUP BY Prior_purchases
ORDER BY Prior_purchases;DataFrameas
df9
variable
SELECT
CASE
WHEN Weight_in_gms < 1000 THEN '< 1kg'
WHEN Weight_in_gms BETWEEN 1000 AND 1999 THEN '1kg - 2kg'
WHEN Weight_in_gms BETWEEN 2000 AND 2999 THEN '2kg - 3kg'
WHEN Weight_in_gms BETWEEN 3000 AND 3999 THEN '3kg - 4kg'
WHEN Weight_in_gms BETWEEN 4000 AND 4999 THEN '4kg - 5kg'
WHEN Weight_in_gms BETWEEN 5000 AND 5999 THEN '5kg - 6kg'
WHEN Weight_in_gms BETWEEN 6000 AND 6999 THEN '6kg - 7kg'
ELSE '7kg - 8kg'
END AS weight,
COUNT(*) AS orders,
SUM(CASE WHEN Class = 1 THEN 1 ELSE 0 END) AS returns,
ROUND(100.0 * SUM(CASE WHEN Class = 1 THEN 1 ELSE 0 END) / COUNT(*), 2) AS return_rate
FROM shipping_ecommerce.csv
GROUP BY weight
ORDER BY weight;DataFrameas
df10
variable
-- Which product importance level (low, medium, high) gets returned the most?
SELECT Product_importance,
count(*) AS orders,
SUM(CASE WHEN Class = 1 THEN 1 ELSE 0 END) AS returns,
ROUND(100.0 * SUM(CASE WHEN Class = 1 THEN 1 ELSE 0 END)/COUNT(*), 2) AS return_rate
FROM shipping_ecommerce.csv
GROUP BY Product_importance
ORDER BY return_rate DESC;DataFrameas
df11
variable
-- Which combination of Warehouse Block and Product Importance has the highest return rate?
-- (use cube)
SELECT
Warehouse_block,
Product_importance,
ROUND(100.0 * SUM(CASE WHEN Class = 1 THEN 1 ELSE 0 END)/COUNT(*), 2) AS return_rate
FROM shipping_ecommerce.csv
GROUP BY CUBE (Warehouse_block, Product_importance);
DataFrameas
df12
variable
-- What is the average weight of returned vs non-returned products, and does product weight significantly affect returns?
SELECT
Class,
COUNT(*) AS total_orders,
ROUND(AVG(Weight_in_gms),2) AS avg_weight
FROM shipping_ecommerce.csv
GROUP BY Class;
DataFrameas
df13
variable
-- Are higher discounts associated with higher return rates?
SELECT
CASE
WHEN Discount_offered BETWEEN 1 AND 10 THEN 'low (1–10%)'
WHEN Discount_offered BETWEEN 11 AND 15 THEN 'mid (11–15%)'
WHEN Discount_offered BETWEEN 16 AND 30 THEN 'high (16–30%)'
ELSE 'extreme (31%+)'
END AS discount_band,
COUNT(*) AS orders,
SUM(CASE WHEN Class = 1 THEN 1 ELSE 0 END) AS returns,
ROUND(100.0 * SUM(CASE WHEN Class = 1 THEN 1 ELSE 0 END) / COUNT(*),2) AS return_rate
FROM shipping_ecommerce.csv
GROUP BY discount_band
ORDER BY
CASE
WHEN discount_band = 'low (1–10%)' THEN 1
WHEN discount_band = 'mid (11–15%)' THEN 2
WHEN discount_band = 'high (16–30%)' THEN 3
ELSE 4
END;
-- a basic query showing just return rate w individual discounts ordered by discounts showed that till 10% discount the return rate varied and post 10% discount it was always 100%, this could be either due to a problem in the dataset or because only defected products are shipped out on a high discount.DataFrameas
df14
variable
-- Do repeat customers (with prior purchases > 0) return products more or less than first-time buyers?
SELECT
CASE
WHEN Prior_purchases >= 1 THEN 'return customer'
ELSE 'first time customer'
END AS customer_type,
ROUND(100.0 * SUM(CASE WHEN Class = 1 THEN 1 ELSE 0 END) / COUNT(*), 2) AS return_rate,
COUNT(*) AS orders
FROM shipping_ecommerce.csv
GROUP BY customer_type;
-- no first time customers