Skip to content

Jump into the buzzing world of Twitter! 🐦 In this project, you'll explore the fascinating network of Twitter connections and uncover the hidden patterns within one of the most popular social networks out there. You'll get hands-on with real Twitter follower data using Pandas, transforming it into a cool directed graph with NetworkX. Along the way, you'll learn how to spot key influencers, find out who follows who, and discover mutual connections between users.

The Data

twitter-followers.csv

ColumnDescription
'FOLLOWER'id of the user who is following
'FOLLOWEE'id of the user being followed

Throughout the project, you will manipulate and organize this data to extract meaningful insights.

# Imports you'll need for the project
import pandas as pd
import networkx as nx

# Load csv data and store as edgelist (a directed graph)
df = pd.read_csv('twitter-followers.csv')
T = nx.from_pandas_edgelist(df, 'FOLLOWER', 'FOLLOWEE', create_using=nx.DiGraph())

# Display head of the DataFrame
print(df.head())
df.shape
# import networkx as nx
# import matplotlib.pyplot as plt

# nx.draw(T, with_labels=True)
# plt.show()
dir(nx)
def is_following(T, user_id1, user_id2):
    """
    Returns True if user_id2 is following user_id1, else False.
    That is, returns True if there is a directed edge from user_id2 to user_id1.
    """
    return T.has_edge(user_id2, user_id1)
def get_users_following_min_accounts(T, min_following_count):
    """
    Returns a list of user IDs who are following at least min_following_count accounts.
    """
    # The best method is to use T.out_degree(), which gives (node, out_degree) pairs
    return [n for n, d in T.out_degree() if d >= min_following_count]
def get_mutual_followees(T, user_id1, user_id2):
    """
    Returns a list of user ids that user_id1 and user_id2 both follow.
    """
    followees1 = set(T.successors(user_id1))
    followees2 = set(T.successors(user_id2))
    return list(followees1 & followees2)
def get_most_connected_user(T):
    """
    Returns the user id of the most connected user (the one with the most followers + followees).
    If multiple users have the same maximum total connections, returns one of them.
    Assumes T is the directed graph of the network.
    """
    max_connections = -1
    most_connected_user = None
    for user in T.nodes():
        total_connections = T.in_degree(user) + T.out_degree(user)
        if total_connections > max_connections:
            max_connections = total_connections
            most_connected_user = user
    return most_connected_user