Skip to content
Veteran Suicides
Veteran Suicides Analysis
I downloaded a dataset from the Department of Veterans Affairs with veterans suicide statistics from 2001-2021. I wanted to answer a few questions from this dataset:
- Is there a significant change in the number of suicides over time?
- Is there a significant difference in suicide rates between veterans and non-veterans?
- What is the breakdown in suicide rates by age group?
- Is there a significant difference in suicide rates between male veterans and female veterans?
- What is the breakdwon in suicide rates by demographic group?
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from scipy import stats
import statsmodels.api as sm
veterans = pd.read_excel('va_suicides_prepped.xlsx', sheet_name='Veteran')
non_veterans = pd.read_excel('va_suicides_prepped.xlsx', sheet_name='Non-Veteran')
by_demos = pd.read_excel('va_suicides_prepped.xlsx', sheet_name='Veteran Race & Ethnicity')new_cols = ['year', 'age_group', 'deaths', 'population', 'rate_per_100k', 'male_deaths', 'male_population', 'male_rate_per_100k', 'female_age_group', 'female_deaths', 'female_population', 'female_rate_per_100k']
veterans.columns = new_cols
veterans['female_deaths'] = pd.to_numeric(veterans['female_deaths'], errors='coerce')
veterans['female_population'] = pd.to_numeric(veterans['female_population'], errors='coerce')
veterans['female_rate_per_100k'] = pd.to_numeric(veterans['female_rate_per_100k'], errors='coerce')
veterans.info()
veterans.head()all_veterans = veterans[veterans['age_group'] == 'All']
all_veterans['deaths_growth'] = all_veterans['deaths'].pct_change()
all_veterans['rate_growth'] = all_veterans['rate_per_100k'].pct_change()
all_veterans['male_rate_growth'] = all_veterans['male_rate_per_100k'].pct_change()
all_veterans['female_rate_growth'] = all_veterans['female_rate_per_100k'].pct_change()
all_veteransQuestion 1
Studying veteran suicides and suicide rates over the time period.
min_deaths = all_veterans['deaths'].min()
min_death_year = all_veterans.loc[all_veterans['deaths'] == min_deaths, 'year'].values[0]
max_deaths = all_veterans['deaths'].max()
max_death_year = all_veterans.loc[all_veterans['deaths'] == max_deaths, 'year'].values[0]
print(f'Min deaths: {min_deaths} in the year {min_death_year}')
print(f'Max deaths: {max_deaths} in the year {max_death_year}')fig = px.line(data_frame=all_veterans, x='year', y='deaths', title='Veteran Suicides 2001-2021', range_y=(0, 7000))
fig.show()fig = px.line(data_frame=all_veterans, x='year', y='rate_per_100k', title='Veteran Suicide Rates 2001-2021', range_y=(0, 40))
fig.show()min_rate = all_veterans['rate_per_100k'].min()
min_rate_year = all_veterans.loc[all_veterans['rate_per_100k'] == min_rate, 'year'].values[0]
max_rate = all_veterans['rate_per_100k'].max()
max_rate_year = all_veterans.loc[all_veterans['rate_per_100k'] == max_rate, 'year'].values[0]
print(f'Min death rate: {min_rate:.2f} in the year {min_rate_year}')
print(f'Max death rate: {max_rate:.2f} in the year {max_rate_year}')avg_deaths_growth = all_veterans['deaths_growth'].mean() * 100
avg_rate_growth = all_veterans['rate_growth'].mean() * 100
print(f"Average growth rate in suicides: {avg_deaths_growth:.2f}%")
print(f"Average growth rate in rates per 100k population: {avg_rate_growth:.2f}%.")x = all_veterans['year']
y = all_veterans['rate_per_100k']
X = sm.add_constant(x)
model = sm.OLS(y, X).fit()
model.summary()next_five = pd.DataFrame({'year': [2022, 2023, 2024, 2025, 2026]})
next_five = sm.add_constant(next_five)
predictions = model.predict(next_five)
forecast = pd.DataFrame({'year': next_five['year'], 'forecast': predictions})
forecast['forecast'] = forecast['forecast'].round(1)
forecastQuestion 2
Determining whether there is a significant difference in suicide rates between veterans and non-veterans.
Run cancelled
non_veterans.columns = new_cols
non_veterans.info()
non_veterans.head()Run cancelled
all_non_veterans = non_veterans[non_veterans['age_group'] == 'All']
all_non_veterans['deaths_growth'] = all_non_veterans['deaths'].pct_change()
all_non_veterans['rate_growth'] = all_non_veterans['rate_per_100k'].pct_change()
all_non_veterans['male_rate_growth'] = all_non_veterans['male_rate_per_100k'].pct_change()
all_non_veterans['female_rate_growth'] = all_non_veterans['female_rate_per_100k'].pct_change()
all_non_veterans