Los Angeles, California 😎. The City of Angels. Tinseltown. The Entertainment Capital of the World!
Known for its warm weather, palm trees, sprawling coastline, and Hollywood, along with producing some of the most iconic films and songs. However, as with any highly populated city, it isn't always glamorous and there can be a large volume of crime. That's where you can help!
You have been asked to support the Los Angeles Police Department (LAPD) by analyzing crime data to identify patterns in criminal behavior. They plan to use your insights to allocate resources effectively to tackle various crimes in different areas.
The Data
They have provided you with a single dataset to use. A summary and preview are provided below.
It is a modified version of the original data, which is publicly available from Los Angeles Open Data.
crimes.csv
| Column | Description |
|---|---|
'DR_NO' | Division of Records Number: Official file number made up of a 2-digit year, area ID, and 5 digits. |
'Date Rptd' | Date reported - MM/DD/YYYY. |
'DATE OCC' | Date of occurrence - MM/DD/YYYY. |
'TIME OCC' | In 24-hour military time. |
'AREA NAME' | The 21 Geographic Areas or Patrol Divisions are also given a name designation that references a landmark or the surrounding community that it is responsible for. For example, the 77th Street Division is located at the intersection of South Broadway and 77th Street, serving neighborhoods in South Los Angeles. |
'Crm Cd Desc' | Indicates the crime committed. |
'Vict Age' | Victim's age in years. |
'Vict Sex' | Victim's sex: F: Female, M: Male, X: Unknown. |
'Vict Descent' | Victim's descent:
|
'Weapon Desc' | Description of the weapon used (if applicable). |
'Status Desc' | Crime status. |
'LOCATION' | Street address of the crime. |
# Re-run this cell
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
crimes = pd.read_csv("crimes.csv", dtype={"TIME OCC": str})
print(crimes.head())
print(crimes.dtypes)print(crimes.isna().sum())# Which hour has the highest frequency of crimes?
peak_crime_hour_org = crimes['TIME OCC'].value_counts().sort_values(ascending=False).index[0]
peak_crime_hour = int(peak_crime_hour_org[0:2])
print(peak_crime_hour)# Checking number of crimes x hour
crimes['HOUR'] = crimes['TIME OCC'].apply(lambda x: int(x[0:2]))
crimes['MINUTES'] = crimes['TIME OCC'].apply(lambda x: int(x[2:]))
sns.countplot(data=crimes, x='HOUR')
plt.show()# Area with most frequent night crimes.
crimes['NIGHT_CRIME'] = (crimes['HOUR'] >= 22) | (crimes['HOUR'] < 4)
night_crimes_by_area = crimes.groupby('AREA NAME')['NIGHT_CRIME'].agg('sum').sort_values(ascending=False).reset_index()
sns.barplot(data=night_crimes_by_area, x='AREA NAME', y='NIGHT_CRIME')
plt.xticks(rotation=45)
plt.show()
peak_night_crime_location = night_crimes_by_area.iloc[0]['AREA NAME']
print(peak_night_crime_location)# Number of crimes committed against victins of different age groups
crimes['VICTIM_AGE_GROUP'] = pd.cut(
x=crimes['Vict Age'],
bins=[0,17,25,34,44,54,64, float('inf')],
labels=["0-17", "18-25", "26-34", "35-44", "45-54", "55-64", "65+"],
right=True
)
victim_ages = crimes.groupby('VICTIM_AGE_GROUP')['DR_NO'].count()
victim_ages_ = victim_ages.reset_index().rename(columns={'DR_NO': 'QUANTITY'})
sns.barplot(data=victim_ages_, x='VICTIM_AGE_GROUP', y='QUANTITY')
plt.show()# Normalizing crime description
crime_mapping = {
# Theft & Fraud
'THEFT OF IDENTITY': 'Theft & Fraud',
'BURGLARY': 'Property Crimes',
'THEFT PLAIN - PETTY ($950 & UNDER)': 'Theft & Fraud',
'THEFT FROM MOTOR VEHICLE - GRAND ($950.01 AND OVER)': 'Theft & Fraud',
'DOCUMENT FORGERY / STOLEN FELONY': 'Theft & Fraud',
'BUNCO, GRAND THEFT': 'Theft & Fraud',
'EMBEZZLEMENT, GRAND THEFT ($950.01 & OVER)': 'Theft & Fraud',
'SHOPLIFTING - PETTY THEFT ($950 & UNDER)': 'Theft & Fraud',
'CREDIT CARDS, FRAUD USE ($950 & UNDER)': 'Theft & Fraud',
'PICKPOCKET': 'Theft & Fraud',
'EXTORTION': 'Theft & Fraud',
'UNAUTHORIZED COMPUTER ACCESS': 'Theft & Fraud',
'THEFT FROM MOTOR VEHICLE - PETTY ($950 & UNDER)': 'Theft & Fraud',
'SHOPLIFTING-GRAND THEFT ($950.01 & OVER)': 'Theft & Fraud',
'BIKE - STOLEN': 'Theft & Fraud',
'THEFT, PERSON': 'Theft & Fraud',
'PURSE SNATCHING': 'Theft & Fraud',
'COUNTERFEIT': 'Theft & Fraud',
# Assault & Violence
'INTIMATE PARTNER - SIMPLE ASSAULT': 'Assault & Violence',
'INTIMATE PARTNER - AGGRAVATED ASSAULT': 'Assault & Violence',
'BATTERY - SIMPLE ASSAULT': 'Assault & Violence',
'CRIMINAL THREATS - NO WEAPON DISPLAYED': 'Assault & Violence',
'ROBBERY': 'Assault & Violence',
'ATTEMPTED ROBBERY': 'Assault & Violence',
'ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT': 'Assault & Violence',
'CRIMINAL HOMICIDE': 'Assault & Violence',
'OTHER ASSAULT': 'Assault & Violence',
'BATTERY POLICE (SIMPLE)': 'Assault & Violence',
'ASSAULT WITH DEADLY WEAPON ON POLICE OFFICER': 'Assault & Violence',
# Property Crimes
'VANDALISM - FELONY ($400 & OVER, ALL CHURCH VANDALISMS)': 'Property Crimes',
'TRESPASSING': 'Property Crimes',
'BURGLARY FROM VEHICLE': 'Property Crimes',
'BURGLARY FROM VEHICLE, ATTEMPTED': 'Property Crimes',
'VEHICLE - STOLEN': 'Property Crimes',
'ARSON': 'Property Crimes',
# Sexual & Human Trafficking Crimes
'HUMAN TRAFFICKING - INVOLUNTARY SERVITUDE': 'Sexual & Human Trafficking Crimes',
'ORAL COPULATION': 'Sexual & Human Trafficking Crimes',
'INDECENT EXPOSURE': 'Sexual & Human Trafficking Crimes',
'PIMPING': 'Sexual & Human Trafficking Crimes',
'PANDERING': 'Sexual & Human Trafficking Crimes',
# Weapons & Firearms Crimes
'BRANDISH WEAPON': 'Weapons & Firearms Crimes',
'DISCHARGE FIREARMS/SHOTS FIRED': 'Weapons & Firearms Crimes',
'WEAPONS POSSESSION/BOMBING': 'Weapons & Firearms Crimes',
'SHOTS FIRED AT MOVING VEHICLE, TRAIN OR AIRCRAFT': 'Weapons & Firearms Crimes',
# Public Order & Miscellaneous Crimes
'DISTURBING THE PEACE': 'Public Order & Miscellaneous Crimes',
'CONTEMPT OF COURT': 'Public Order & Miscellaneous Crimes',
'FALSE POLICE REPORT': 'Public Order & Miscellaneous Crimes',
'RESISTING ARREST': 'Public Order & Miscellaneous Crimes',
'RECKLESS DRIVING': 'Public Order & Miscellaneous Crimes',
'DISRUPT SCHOOL': 'Public Order & Miscellaneous Crimes',
# Kidnapping & Restraining Order Violations
'VIOLATION OF RESTRAINING ORDER': 'Kidnapping & Restraining Order Violations',
'VIOLATION OF TEMPORARY RESTRAINING ORDER': 'Kidnapping & Restraining Order Violations',
'KIDNAPPING': 'Kidnapping & Restraining Order Violations',
'FALSE IMPRISONMENT': 'Kidnapping & Restraining Order Violations',
}
crimes['Normalized_Crime_Type'] = crimes['Crm Cd Desc'].map(crime_mapping).fillna('Other')# Does crime type vary by time of day ?
crimes['DAY_PERIOD'] = pd.cut(
x=crimes['HOUR'],
bins=[0,6,13,18,25],
labels=['Dawn', 'Morning', 'Afternoon', 'Night'],
right=False
)
crime_type_time = crimes.groupby(['Normalized_Crime_Type', 'HOUR'])['DR_NO'].size().reset_index()
crime_type_time.rename(columns={'DR_NO': 'QUANTITY'}, inplace=True)
sns.swarmplot(data=crime_type_time, x='HOUR', y='QUANTITY', hue='Normalized_Crime_Type')
plt.show()
# No, the crime type don't vary through the day.# Normalizing weapon desc
weapon_mapping = {
'VERBAL THREAT': 'Non-Physical Threat',
'STRONG-ARM (HANDS, FIST, FEET OR BODILY FORCE)': 'Hands/Feet/Body',
'PHYSICAL PRESENCE': 'Non-Physical Threat',
'UNKNOWN WEAPON/OTHER WEAPON': 'Other/Unknown',
'OTHER KNIFE': 'Edged/Bladed Weapon',
'HAND GUN': 'Firearm',
'FIXED OBJECT': 'Blunt Object',
'KNIFE WITH BLADE 6INCHES OR LESS': 'Edged/Bladed Weapon',
'SEMI-AUTOMATIC PISTOL': 'Firearm',
'BLUNT INSTRUMENT': 'Blunt Object',
'UNKNOWN TYPE CUTTING INSTRUMENT': 'Edged/Bladed Weapon',
'BOTTLE': 'Blunt Object',
'CLUB/BAT': 'Blunt Object',
'SHOTGUN': 'Firearm',
'BELT FLAILING INSTRUMENT/CHAIN': 'Blunt Object',
'MACHETE': 'Edged/Bladed Weapon',
'ROCK/THROWN OBJECT': 'Blunt Object',
'RIFLE': 'Firearm',
'ASSAULT WEAPON/UZI/AK47/ETC': 'Firearm',
'UNKNOWN FIREARM': 'Firearm',
'SIMULATED GUN': 'Other/Unknown',
'PIPE/METAL PIPE': 'Blunt Object',
'RAZOR': 'Edged/Bladed Weapon',
'KNIFE WITH BLADE OVER 6 INCHES IN LENGTH': 'Edged/Bladed Weapon',
'FOLDING KNIFE': 'Edged/Bladed Weapon',
'VEHICLE': 'Other/Unknown',
'RAZOR BLADE': 'Edged/Bladed Weapon',
'MACE/PEPPER SPRAY': 'Other/Unknown',
'TIRE IRON': 'Blunt Object',
'EXPLOXIVE DEVICE': 'Other/Unknown',
'STICK': 'Blunt Object',
'SCISSORS': 'Edged/Bladed Weapon',
'KITCHEN KNIFE': 'Edged/Bladed Weapon',
'REVOLVER': 'Firearm',
'FIRE': 'Other/Unknown',
'AIR PISTOL/REVOLVER/RIFLE/BB GUN': 'Firearm',
'STUN GUN': 'Other/Unknown',
'OTHER FIREARM': 'Firearm',
'CONCRETE BLOCK/BRICK': 'Blunt Object',
'SWITCH BLADE': 'Edged/Bladed Weapon',
'CAUSTIC CHEMICAL/POISON': 'Other/Unknown',
'HAMMER': 'Blunt Object',
'OTHER CUTTING INSTRUMENT': 'Edged/Bladed Weapon',
'ICE PICK': 'Edged/Bladed Weapon',
'SAWED OFF RIFLE/SHOTGUN': 'Firearm',
'DOG/ANIMAL (SIC ANIMAL ON)': 'Other/Unknown',
'GLASS': 'Blunt Object',
'DIRK/DAGGER': 'Edged/Bladed Weapon',
'BRASS KNUCKLES': 'Blunt Object',
'TOY GUN': 'Other/Unknown',
'BOW AND ARROW': 'Other/Unknown',
'AXE': 'Edged/Bladed Weapon',
'HECKLER & KOCH 93 SEMIAUTOMATIC ASSAULT RIFLE': 'Firearm',
'SCREWDRIVER': 'Edged/Bladed Weapon',
'ROPE/LIGATURE': 'Other/Unknown',
'SCALDING LIQUID': 'Other/Unknown',
'UNK TYPE SEMIAUTOMATIC ASSAULT RIFLE': 'Firearm',
'LIQUOR/DRUGS': 'Other/Unknown',
'STARTER PISTOL/REVOLVER': 'Firearm',
'SWORD': 'Edged/Bladed Weapon',
'BOARD': 'Blunt Object',
'ANTIQUE FIREARM': 'Firearm',
'BOMB THREAT': 'Other/Unknown',
'STRAIGHT RAZOR': 'Edged/Bladed Weapon',
'SEMI-AUTOMATIC RIFLE': 'Firearm',
'AUTOMATIC WEAPON/SUB-MACHINE GUN': 'Firearm',
'DEMAND NOTE': 'Non-Physical Threat',
'SYRINGE': 'Other/Unknown',
'RELIC FIREARM': 'Firearm',
'BLACKJACK': 'Blunt Object',
'CLEAVER': 'Edged/Bladed Weapon',
'M-14 SEMIAUTOMATIC ASSAULT RIFLE': 'Firearm',
'BOWIE KNIFE': 'Edged/Bladed Weapon',
'MAC-10 SEMIAUTOMATIC ASSAULT WEAPON': 'Firearm',
}
def normalize_weapon(df):
df['Normalized_Weapon_Desc'] = df['Weapon Desc'].map(weapon_mapping).fillna('Other/Unknown')
return df
def fillna_mode(group):
mode_value = group.mode().iloc[0] if not group.mode().empty else group
return group.fillna(mode_value)# Most commom weapons for day crimes and night crimes
# A lot of NaN values found for 'Weapon Desc' so I will continue with 2 apporach:
# Disregard NaN values vs Replace for the mode in the period
crimes_without_nan = crimes.copy()
crimes_without_nan = normalize_weapon(crimes_without_nan)
crimes_without_nan = crimes_without_nan[~crimes_without_nan['Weapon Desc'].isna()]
sns.countplot(data=crimes_without_nan, x='NIGHT_CRIME', hue='Normalized_Weapon_Desc')
plt.show()
# When I disregard the NaN values, 'Hands/Feet/Body' becomes the most used 'weapon' in the crimes, both for day and night.
crimes_replace_nan = crimes.copy()
crimes_replace_nan['Weapon Desc'] = crimes_replace_nan.groupby('HOUR')['Weapon Desc'].transform(fillna_mode)
crimes_replace_nan = normalize_weapon(crimes_replace_nan)
sns.countplot(data=crimes_replace_nan, x='NIGHT_CRIME', hue='Normalized_Weapon_Desc')
plt.show()
# Using the mode of each Hour to fill the NaN values, 'Non-Physical Threat' becomes the most used 'weapon' in the crimes.# Double checking the NaN values
print(crimes['Weapon Desc'].isna().sum())
print(crimes_without_nan['Weapon Desc'].isna().sum())
print(crimes_replace_nan['Weapon Desc'].isna().sum())# Theres any correlation between violent crimes and areas?
# Are certain areas more prone to violent crime versus property crimes?
crimes['VIOLENT'] = (crimes['Normalized_Crime_Type'] == 'Assault & Violence')
crimes['PROPERTY'] = (crimes['Normalized_Crime_Type'] == 'Property Crimes')
crimes_prop_viol_area = crimes.groupby(['AREA NAME'])['VIOLENT', 'PROPERTY'].sum().reset_index().melt(
id_vars='AREA NAME',
value_vars=['VIOLENT','PROPERTY'],
var_name='CRIME TYPE',
value_name='COUNT'
)
sns.barplot(
data=crimes_prop_viol_area,
x='AREA NAME',
y='COUNT',
hue='CRIME TYPE'
)
plt.xticks(rotation=45)
plt.show()
# Overall, the areas has more violent crimes than property, but specifically in '77th Street' the relation is almost 3x
# The area with most property crimes is 'Central' area.