Skip to content

Community Detection in Congress

Libraries

import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

from networkx.algorithms.community import greedy_modularity_communities
from collections import defaultdict
from ast import literal_eval

Load & preprocess

# read csv
members = pd.read_csv('members.csv', index_col = 0)
bills = pd.read_csv('legislation.csv', index_col = 0)

# convert aggregate string to lists of strings
members.committee_assignments = members.committee_assignments.apply(literal_eval)

bills.cosponsors=bills.cosponsors.apply(literal_eval)
bills.subjects = bills.subjects.apply(literal_eval)
bills.committees = bills.committees.apply(literal_eval)
bills.related_bills = bills.related_bills.apply(literal_eval)
members.drop('chamber', axis=1, inplace=True)
members
bills.drop(['date_introduced', 'number', 'bill_type'], axis=1, inplace=True)
bills.head(8)

Explore the data

The following plots explore the distribution of bills by the party of the sponsoring member and the policy area of the bill. The first set shows bills introduced and bills passed by policy area, by party. The second set shows bills introduced/passed by committee, by party. The last plot breaks down committee membership by party.

# list of policy areas in bills
areas = bills.policy_area.unique()

# create dictionary with keys corresponding to Dem/Rep bills introduced/passed
dic = {area: [0,0,0,0] for area in areas}

# iterate through bills and tally the bills by policy area
for index, row in bills.iterrows():
    # increment bills introduced by Democrats
    if members.loc[row.sponsor].current_party == 'Democratic':
        dic[row.policy_area][0] += 1
        # increment bills passed by Democrats
        if row.bill_progress == ('Passed House' or 'Passed Senate' or 'Became Law' or 'To President' or 'Agreed to in House' or 'Agreed to in Senate'):
                dic[row.policy_area][1]+=1 
    # increment bills introduced by Republicans
    elif members.loc[row.sponsor].current_party == 'Republican':
        dic[row.policy_area][2] += 1
        # increment bills passed by Republicans
        if row.bill_progress == ('Passed House' or 'Passed Senate' or 'Became Law' or 'To President' or 'Agreed to in House' or 'Agreed to in Senate'):
                dic[row.policy_area][3]+=1 

# create dataframe from dictionary, rows = policy_areas and columns = bills status and party
df = pd.DataFrame(dic.values(), index = dic.keys(), columns = ['Democratic', 'Democrats_Passed','Republican', 'Republicans_Passed']).sort_values('Democratic', ascending = False)
# remove records with empty values
df.drop(np.nan, inplace = True)

# add row for 'Total' for all bills regardless of policy area
df.loc['Total'] = [sum(df[x]) for x in df.columns]

# add column 'Total' for total bills introduced by policy area
df['Total'] = df.Democratic +df.Republican

# add column 'Total_Passed' for total bills passed by policy area
df['Total_Passed'] = df.Democrats_Passed+df.Republicans_Passed

# add columns for percentages of bills passed/introduced by Democrats, Republicans and overall 
df['D%_passed'] = df.Democrats_Passed / df.Total_Passed
df['R%_passed'] = df.Republicans_Passed / df.Total_Passed
df['D%_introduced'] = df['Democratic'] / df['Total'] 
df['R%_introduced'] = df['Republican'] / df['Total']
df['%_passed'] = 100 * (df['Total_Passed'] / df['Total'])

# fill nan values with 0
df.fillna(0, inplace=True)

# sort df by total introduced by policy area
df.sort_values(by = ['Total'], ascending = False, inplace = True)
# set plot variables
n = 25
x = df.head(n).index
y1 = df.head(n)['D%_introduced']
y2 = df.head(n)['R%_introduced']
total = df.head(n)['Total']

# create subplot, bills introduced and bills passed
fig, (ax1) = plt.subplots(nrows = 1, ncols = 1, figsize = (12, 4), squeeze = True)

# plot democrats/republican introduced
ax1.bar(x, y1, label = 'Democrats', alpha = .5)
ax1.bar(x, y2 ,bottom = y1,label = 'Republicans', alpha = .5)

# add percentages introduced in each policy by Republicans and Democrats
for xpos, ypos, yval in zip(x, y1/2, y1):
    if yval > 0:
        ax1.text(xpos, ypos, str(round(yval * 100,0)), ha = "center", va = "center", rotation = 90)

for xpos, ypos, yval in zip(x, y1+y2/2, y2):
    if yval > 0:
        ax1.text(xpos, ypos, str(round(yval * 100,0)), ha = "center", va = "center", rotation = 90)

# add total bills introduced in each policy area
for xpos, ypos, total in zip(x, y1+y2, total):
    ax1.text(xpos, ypos +.05, total, ha = "center", va = "bottom")

# rotate x labels
for tick in ax1.get_xticklabels():
    tick.set_rotation(90)

# set title and legend
ax1.set_title('Bills Introduced by Party and Policy Area (%)')
ax1.legend(loc = 'upper right')

# set y_lim, hide axis
ax1.set_ylim(0,1.5)
ax1.yaxis.set_visible(False)

plt.show()
# create subplot, bills introduced and bills passed
fig, (ax2) = plt.subplots(nrows = 1, ncols = 1, figsize = (12, 4), squeeze = True)

# re-sort df by total passed by policy area
df.sort_values(by = ['Total_Passed'], ascending = False, inplace = True)

# set plot variables
n = 20
x = df.head(n).index
y1 = df.head(n)['D%_passed']
y2 = df.head(n)['R%_passed']
y3 = df.head(n)['Democratic']+ df.head(n)['Republican']
total = df.head(n)['Total_Passed']

# plot bars representing the number of bills passed by Democrats/Republicans by policy area
ax2.bar(x, y1, label = 'Democrats', alpha = .5)
ax2.bar(x, y2 ,bottom = y1,label = 'Republicans', alpha = .5)

# add percentages annotation
for xpos, ypos, yval in zip(x, y1/2, y1):
    if yval > 0:
        ax2.text(xpos, ypos, str(round(yval * 100,0)), ha = "center", va = "center", rotation = 90)

for xpos, ypos, yval in zip(x, y1+y2/2, y2):
    if yval > 0:
        ax2.text(xpos, ypos, str(round(yval * 100,0)), ha = "center", va = "center", rotation = 90)

# add total number of bills passed in each policy area
for xpos, ypos, total in zip(x, y1+y2, total):
    ax2.text(xpos, ypos +.05, total, ha = "center", va = "bottom")

# rotate x labels
for tick in ax2.get_xticklabels():
    tick.set_rotation(90)

# set title legend
ax2.set_title('Bills Passed by Party and Policy Area (%)')
ax2.legend(loc = 'upper right')

# set y_lim, hide axis
ax2.set_ylim(0,1.5)
ax2.yaxis.set_visible(False)

plt.show()
# create lists of unique committees in the House of Representatives
committees =[]
Dems=[]
Reps=[]
D_passed = []
R_passed = []

# iterate through bills and append unique values to committees list
for bill, row in bills.iterrows():
    for committee in row.committees:
        if (committee not in committees) and ('House' in committee):
            committees.append(committee)

# create dictionary with keys corresponding to unique committees with values initialized to 0
dic = {committee: [0,0,0,0] for committee in committees}

# iterate through bills and based on committees
for index, row in bills.iterrows():
    # iterate through committees associated with bill
    for committee in row.committees:
        # only consider House committees, exclude Senate committees
        if ('House' in committee): 
            if members.loc[row.sponsor].current_party == 'Democratic':
                # increment bills referrered to committee that are sponsored by Democrats
                dic[committee][0] += 1
                # get bill name
                if index not in Dems:
                    Dems.append(index)
                # increment bills referred to committee sponsored by Democrats that have passed
                if row.bill_progress == ('Passed House' or 'Passed Senate' or 'Became Law' or 'To President' or 'Agreed to in House' or 'Agreed to in Senate'):
                    dic[committee][1] += 1
                    # get bill name
                    if index not in D_passed:
                        D_passed.append(index)

            elif members.loc[row.sponsor].current_party == 'Republican':
                # increment bills referred to committee that were sponsored by Republicans
                dic[committee][2] += 1
                # get bill name
                if index not in Reps:
                    Reps.append(index)
                # increment bills referred to committee that were sponsored by Republicans which passed the House
                if row.bill_progress == ('Passed House' or 'Passed Senate' or 'Became Law' or 'To President' or 'Agreed to in House' or 'Agreed to in Senate'):
                    dic[committee][3] += 1
                    # get bill name
                    if index not in R_passed:
                        R_passed.append(index)

# create dataframe from dictionary
df = pd.DataFrame(dic.values(), index = dic.keys(), columns = ['Democratic', 'Democrats_Passed','Republican', 'Republicans_Passed']).sort_values('Democratic', ascending = False)

# add row for all bills regardless of committee to which they were referred
df.loc['Total'] = [len(Dems), len(D_passed), len(Reps), len(R_passed)]

# add column 'Total' for all bills introduced regardless of party of sponsor
df['Total'] = df.Democratic + df.Republican

# add column 'Total_Passed' for all bills passed regardless of party of sponsor
df['Total_Passed'] = df.Democrats_Passed + df.Republicans_Passed

# add percentages columns
df['D%_passed'] = df.Democrats_Passed / df.Total_Passed
df['R%_passed'] = df.Republicans_Passed / df.Total_Passed
df['D%_introduced'] = df['Democratic'] / df['Total'] 
df['R%_introduced'] = df['Republican'] / df['Total']
df['%_passed'] = 100 * (df['Total_Passed'] / df['Total'])

# fill empties with 0
df.fillna(0, inplace = True)

# sort by 'Total'
df.sort_values(by = ['Total'], ascending = False, inplace= True)
# set plot variables
n = 25
x = df.head(n).index
y1 = df.head(n)['D%_introduced']
y2 = df.head(n)['R%_introduced']
total = df.head(n)['Total']

# plot bars
fig, (ax1) = plt.subplots(nrows = 1, ncols = 1, figsize = (12, 4), squeeze = True)
ax1.bar(x, y1, label = 'Democrats', alpha = .5)
ax1.bar(x, y2 ,bottom = y1,label = 'Republicans', alpha = .5)

# add annotations
for xpos, ypos, yval in zip(x, y1/2, y1):
    if yval > 0:
        ax1.text(xpos, ypos, str(round(yval * 100,0)), ha = "center", va = "center", rotation = 90)

for xpos, ypos, yval in zip(x, y1+y2/2, y2):
    if yval > 0:
        ax1.text(xpos, ypos, str(round(yval * 100,0)), ha = "center", va = "center", rotation = 90)

for xpos, ypos, total in zip(x, y1+y2, total):
    ax1.text(xpos, ypos + .05, total, ha = "center", va = "bottom")

# rotate x labels
for tick in ax1.get_xticklabels():
    tick.set_rotation(90)

# set title and legend
ax1.set_title('Bills Introduced by Party and Committee (%)')
ax1.legend(loc = 'upper right')

# set y_lim, hide axis
ax1.set_ylim(0,1.5)
ax1.yaxis.set_visible(False)

plt.show()