Skip to content
New Workbook
Sign up
Extract Mentions and Hashtags from Tweets

Extract mentions and hashtags from tweets

Extract hashtags and tweets from your twitter data with regex. This is a helpful first step in topic identification.

# Load packages
import pandas as pd 
import nltk
from nltk.tokenize import regexp_tokenize

import pandas as pd

# Upload your data as a csv or json file and load it as a data frame 
import pandas as pd
try:
    df = pd.read_csv('twitter.csv')
except:
    df = pd.read_json('twitter.json', orient='index').T

df.head()
# Define a regex pattern to find hashtags: pattern1
pattern1 = r"#\w+"

# Write a pattern that matches both mentions (@) and hashtags
pattern2 = r"(@\w+)"

hashtags = []
mentions = []
for tweet in df['text']:
    hashtags.append(nltk.tokenize.regexp_tokenize(tweet, pattern1))
    mentions.append(regexp_tokenize(tweet, pattern2))

df['mentions'] = mentions
df['hashtags'] = hashtags
df.head()