Skip to content

Spinner
DataFrameas
df
variable
SELECT
    public.pet_supplies.product_id,
    (CASE WHEN public.pet_supplies.category = '-' OR category IS NULl THEN 'Unknown'
ELSE public.pet_supplies.category END) as category,
    public.pet_supplies.animal,
    CASE 
    WHEN initcap(public.pet_supplies.size) IS NOT NULL THEN initcap(public.pet_supplies.size)
    ELSE 'Unknown' 
	END AS size,
    ROUND(COALESCE(NULLIF((case when public.pet_supplies.price ~ '^[0-9]+(\.[0-9]{1,2})?$' then public.pet_supplies.price::numeric end), 0), 0), 2) AS price,
    COALESCE(public.pet_supplies.sales, 
             (SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY sales) 
              FROM public.pet_supplies 
              WHERE sales IS NOT NULL)) AS sales,
    COALESCE(public.pet_supplies.rating, 0) AS rating,
    public.pet_supplies.repeat_purchase
FROM 
    public.pet_supplies
WHERE 
    public.pet_supplies.product_id IS NOT NULL 
    AND public.pet_supplies.repeat_purchase IS NOT NULL;
Spinner
DataFrameas
df1
variable
-- Explore the data in the table
SELECT public.pet_supplies.animal,
	public.pet_supplies.repeat_purchase,
	ROUND(avg(public.pet_supplies.sales)) as avg_sales,
	ROUND(min(public.pet_supplies.sales)) as min_sales,
	ROUND(MAX(public.pet_supplies.sales)) as max_sales
FROM public.pet_supplies
group by public.pet_supplies.animal, public.pet_supplies.repeat_purchase
Spinner
DataFrameas
df2
variable
SELECT 
	public.pet_supplies.product_id,
	public.pet_supplies.sales,
	public.pet_supplies.rating
FROM public.pet_supplies
where animal in ('Cat','Dog') and public.pet_supplies.repeat_purchase = 1