Skip to content
pet_supplies
DataFrameas
df
variable
SELECT
public.pet_supplies.product_id,
(CASE WHEN public.pet_supplies.category = '-' OR category IS NULl THEN 'Unknown'
ELSE public.pet_supplies.category END) as category,
public.pet_supplies.animal,
CASE
WHEN initcap(public.pet_supplies.size) IS NOT NULL THEN initcap(public.pet_supplies.size)
ELSE 'Unknown'
END AS size,
ROUND(COALESCE(NULLIF((case when public.pet_supplies.price ~ '^[0-9]+(\.[0-9]{1,2})?$' then public.pet_supplies.price::numeric end), 0), 0), 2) AS price,
COALESCE(public.pet_supplies.sales,
(SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY sales)
FROM public.pet_supplies
WHERE sales IS NOT NULL)) AS sales,
COALESCE(public.pet_supplies.rating, 0) AS rating,
public.pet_supplies.repeat_purchase
FROM
public.pet_supplies
WHERE
public.pet_supplies.product_id IS NOT NULL
AND public.pet_supplies.repeat_purchase IS NOT NULL;
DataFrameas
df1
variable
-- Explore the data in the table
SELECT public.pet_supplies.animal,
public.pet_supplies.repeat_purchase,
ROUND(avg(public.pet_supplies.sales)) as avg_sales,
ROUND(min(public.pet_supplies.sales)) as min_sales,
ROUND(MAX(public.pet_supplies.sales)) as max_sales
FROM public.pet_supplies
group by public.pet_supplies.animal, public.pet_supplies.repeat_purchaseDataFrameas
df2
variable
SELECT
public.pet_supplies.product_id,
public.pet_supplies.sales,
public.pet_supplies.rating
FROM public.pet_supplies
where animal in ('Cat','Dog') and public.pet_supplies.repeat_purchase = 1