Skip to content
Spinner
DataFrameas
df1
variable
SELECT * FROM 'pet_supplies_2212.csv'
Spinner
DataFrameas
df5
variable
SELECT product_id,
		COALESCE(
				CASE WHEN category = '-' THEN NULL
				ELSE category
				END, 'Unknown') AS category,
		animal,
		CONCAT(UPPER(LEFT(size,1)),LOWER(RIGHT(size, LENGTH(size)-1))) AS size,
		price,
		sales,
		COALESCE(
				CASE WHEN rating = 'NA' THEN NULL
				ELSE rating:: INTEGER
				END, 0) as rating,
		repeat_purchase
FROM df1;
			
	
Spinner
DataFrameas
df
variable
WITH p1 AS(
		SELECT product_id, category, animal, size, CASE WHEN price = 'unlisted' THEN NULL
			ELSE CAST(price AS numeric)
			END as price_num, sales, rating, repeat_purchase
		FROM df5),
	p2 AS(
		SELECT ROUND(MEDIAN(price_num), 2) AS med_price
		FROM p1)

SELECT product_id, category, animal, size, COALESCE(price_num, med_price) as price, sales, rating, repeat_purchase 
FROM p1,p2;
Spinner
DataFrameas
df2
variable
SELECT category,COUNT(repeat_purchase) AS count
FROM df
WHERE repeat_purchase = 1
GROUP BY category
ORDER BY count DESC;
Spinner
DataFrameas
df3
variable
SELECT sales
FROM df;
Spinner
DataFrameas
df4
variable
SELECT sales, NTILE(4) OVER (ORDER BY sales)
FROM df;
df3.boxplot()
Spinner
DataFrameas
df7
variable
SELECT repeat_purchase, sales
FROM df;
df7.boxplot(by = 'repeat_purchase')