Skip to content
Course Notes: Exploratory Data Analysis in SQL
add text here
Must run the below SQL for all queries to run
import pandas as pd
ev_311 = pd.read_csv('datasets/ev311.csv')
ev_311Hidden output
Hidden code
dfHidden code
df1Hidden code
df2Cases and Spaces
upper and lower functions are used to make all upper cases or lower cases
Hidden code
df3DataFrameas
df4
variable
--Count rows
SELECT count(*)
FROM ev_311
--Where description includes trash or garbage
WHERE description ILIKE '%trash%'
OR description ILIKE '%garbage%';DataFrameas
df5
variable
-- Select categories containing Trash or Garbage
SELECT category
FROM ev_311
-- Use LIKE
WHERE category LIKE '%Trash%'
OR category LIKE '%Garbage%';DataFrameas
df6
variable
--Count rows
SELECT count(*)
FROM ev_311
-- description contains trash or garbage (any case)
WHERE (description ILIKE '%trash%'
OR description ILIKE '%garbage%')
-- category does not contain Trash or Garbage
AND category NOT LIKE '%Trash%'
AND category NOT LIKE '%Garbage%';DataFrameas
df7
variable
-- Count rows with each category
SELECT category, count(*)
FROM ev_311
WHERE (description ILIKE '%trash%'
OR description ILIKE '%garbage%')
AND category NOT LIKE '%Trash%'
AND category NOT LIKE '%Garbage%'
-- What are you counting?
GROUP BY category
ORDER BY DESC
LIMIT 10;DataFrameas
df8
variable
--Concat and then trim the concat from the start
SELECT distinct street,
trim(street, '0123456789 #/.') AS cleaned_street
FROM ev_311
ORDER BY street;
DataFrameas
df9
variable
-- Select the first word of the street value
SELECT split_part(street,' ', 1) AS street_name,
count(*)
FROM ev_311
GROUP BY street_name
ORDER BY street_name DESC
LIMIT 20;DataFrameas
df10
variable
-- Select the first 50 chars when length is greater than 50
SELECT CASE WHEN length(description) > 50
THEN left(description, 50) || '...'
-- otherwise just select description
ELSE description
END
FROM ev_311
-- limit to descriptions that start with the word I
WHERE description LIKE 'I %'
ORDER BY description;